Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-29 Thread Tim Ruehsen
On Monday 27 October 2014 23:57:00 Pär Karlsson wrote:
 No complaints from me, it applied cleanly, all tests OK. :-)
 
 2014-10-27 21:53 GMT+01:00 Tim Rühsen tim.rueh...@gmx.de:
  Am Samstag, 25. Oktober 2014, 11:09:48 schrieb Pär Karlsson:
   Good point. I got carried away by the criticism against xrealloc being
  
  too
  
   expensive, etc, but you are of course right. Your implementation seems
   to
   me the most readable, and in this context, it's what matters. :-)
   
   Thanks for the tip re: callgrind :-)
   
   /Pär
   
   2014-10-24 20:39 GMT+02:00 Tim Rühsen tim.rueh...@gmx.de:
Pär, thanks for your work.

*BUT* speed does not really matter here. My intention was to show code
that is
much more readable than the current implementation which is
unnecessary
complicated and not even understandable by the LLVM/clang analyzer.
That piece of code should be replaced.

FYI, if you want to work on CPU cycle optimization, use valgrind --
tool=callgrind for your wget command to be polished. The resulting
file
can be
viewed with e.g. kcachegrind.

Tim
  
  i made up a patch.
  
  Any complaints ?
  
  Tim

Pushed.

Tim

signature.asc
Description: This is a digitally signed message part.


Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-27 Thread Tim Rühsen
Am Samstag, 25. Oktober 2014, 11:09:48 schrieb Pär Karlsson:
 Good point. I got carried away by the criticism against xrealloc being too
 expensive, etc, but you are of course right. Your implementation seems to
 me the most readable, and in this context, it's what matters. :-)

 Thanks for the tip re: callgrind :-)

 /Pär

 2014-10-24 20:39 GMT+02:00 Tim Rühsen tim.rueh...@gmx.de:
  Pär, thanks for your work.
 
  *BUT* speed does not really matter here. My intention was to show code
  that is
  much more readable than the current implementation which is unnecessary
  complicated and not even understandable by the LLVM/clang analyzer.
  That piece of code should be replaced.
 
  FYI, if you want to work on CPU cycle optimization, use valgrind --
  toolÊllgrind for your wget command to be polished. The resulting file
  can be
  viewed with e.g. kcachegrind.
 
  Tim

i made up a patch.

Any complaints ?

Tim
From cd7ed2dc2251913fc55398de2057713f2ba1e1e6 Mon Sep 17 00:00:00 2001
From: Tim Ruehsen tim.rueh...@gmx.de
Date: Sat, 25 Oct 2014 21:03:05 +0200
Subject: [PATCH 1/2] added strlcpy(), concat_strings() rewritten

Signed-off-by: Tim Ruehsen tim.rueh...@gmx.de
---
 ChangeLog |  6 -
 configure.ac  |  2 +-
 src/ChangeLog |  6 +
 src/utils.c   | 71 +--
 src/utils.h   |  4 
 5 files changed, 56 insertions(+), 33 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index eca59da..51644a1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,8 @@
-2013-10-22  Ángel González keis...@gmail.com
+2014-10-25  Tim Ruehsen tim.rueh...@gmx.de
+
+	* configure.ac: check for strlcpy()
+
+2014-10-22  Ángel González keis...@gmail.com

 	* bootstrap.conf (gnulib_modules): Add module xstrndup.

diff --git a/configure.ac b/configure.ac
index 3cbe618..56a4767 100644
--- a/configure.ac
+++ b/configure.ac
@@ -207,7 +207,7 @@ AC_FUNC_MMAP
 AC_FUNC_FSEEKO
 AC_CHECK_FUNCS(strptime timegm vsnprintf vasprintf drand48 pathconf)
 AC_CHECK_FUNCS(strtoll usleep ftello sigblock sigsetjmp memrchr wcwidth mbtowc)
-AC_CHECK_FUNCS(sleep symlink utime)
+AC_CHECK_FUNCS(sleep symlink utime strlcpy)

 if test x$ENABLE_OPIE = xyes; then
   AC_LIBOBJ([ftp-opie])
diff --git a/src/ChangeLog b/src/ChangeLog
index d0f753f..c58a475 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,9 @@
+ HEAD
+2014-10-25  Tim Ruehsen  tim.rueh...@gmx.de
+
+	* utils.c: added strlcpy(), concat_strings() rewritten
+	* utils.h: added strlcpy()
+
 2014-09-08  Darshit Shah  dar...@gmail.com

 	* ftp.c (ftp_retrieve_glob): Also check for invalid entries along with
diff --git a/src/utils.c b/src/utils.c
index 78c282e..3280294 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -349,6 +349,32 @@ aprintf (const char *fmt, ...)
 #endif /* not HAVE_VASPRINTF */
 }

+#ifndef HAVE_STRLCPY
+/* strlcpy() is a BSD function that sometimes is really handy.
+ * It is the same as snprintf(dst,dstsize,%s,src), but much faster. */
+
+size_t
+strlcpy (char *dst, const char *src, size_t size)
+{
+  const char *old = src;
+
+  /* Copy as many bytes as will fit */
+  if (size)
+{
+  while (--size)
+{
+  if (!(*dst++ = *src++))
+return src - old - 1;
+}
+
+  *dst = 0;
+}
+
+  while (*src++);
+  return src - old - 1;
+}
+#endif
+
 /* Concatenate the NULL-terminated list of string arguments into
freshly allocated space.  */

@@ -356,47 +382,30 @@ char *
 concat_strings (const char *str0, ...)
 {
   va_list args;
-  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
-  char *ret, *p;
-
-  const char *next_str;
-  int total_length = 0;
-  size_t argcount;
+  const char *arg;
+  size_t length = 0, pos = 0;
+  char *s;

-  /* Calculate the length of and allocate the resulting string. */
+  if (!str0)
+return NULL;

-  argcount = 0;
+  /* calculate the length of the resulting string */
   va_start (args, str0);
-  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
-{
-  int len = strlen (next_str);
-  if (argcount  countof (saved_lengths))
-saved_lengths[argcount++] = len;
-  total_length += len;
-}
+  for (arg = str0; arg; arg = va_arg (args, const char *))
+length += strlen(arg);
   va_end (args);
-  p = ret = xmalloc (total_length + 1);

-  /* Copy the strings into the allocated space. */
+  s = xmalloc (length + 1);

-  argcount = 0;
+  /* concatenate strings */
   va_start (args, str0);
-  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
-{
-  int len;
-  if (argcount  countof (saved_lengths))
-len = saved_lengths[argcount++];
-  else
-len = strlen (next_str);
-  memcpy (p, next_str, len);
-  p += len;
-}
+  for (arg = str0; arg; arg = va_arg (args, const char *))
+pos += strlcpy(s + pos, arg, length - pos + 1);
   va_end (args);
-  *p = '\0';

-  return ret;
+  return s;
 }
-
+
 /* Format the provided time according to the specified 

Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-27 Thread Pär Karlsson
No complaints from me, it applied cleanly, all tests OK. :-)

2014-10-27 21:53 GMT+01:00 Tim Rühsen tim.rueh...@gmx.de:

 Am Samstag, 25. Oktober 2014, 11:09:48 schrieb Pär Karlsson:
  Good point. I got carried away by the criticism against xrealloc being
 too
  expensive, etc, but you are of course right. Your implementation seems to
  me the most readable, and in this context, it's what matters. :-)
 
  Thanks for the tip re: callgrind :-)
 
  /Pär
 
  2014-10-24 20:39 GMT+02:00 Tim Rühsen tim.rueh...@gmx.de:
   Pär, thanks for your work.
  
   *BUT* speed does not really matter here. My intention was to show code
   that is
   much more readable than the current implementation which is unnecessary
   complicated and not even understandable by the LLVM/clang analyzer.
   That piece of code should be replaced.
  
   FYI, if you want to work on CPU cycle optimization, use valgrind --
   tool=callgrind for your wget command to be polished. The resulting file
   can be
   viewed with e.g. kcachegrind.
  
   Tim

 i made up a patch.

 Any complaints ?

 Tim



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-25 Thread Pär Karlsson
Good point. I got carried away by the criticism against xrealloc being too
expensive, etc, but you are of course right. Your implementation seems to
me the most readable, and in this context, it's what matters. :-)

Thanks for the tip re: callgrind :-)

/Pär

2014-10-24 20:39 GMT+02:00 Tim Rühsen tim.rueh...@gmx.de:

 Pär, thanks for your work.

 *BUT* speed does not really matter here. My intention was to show code
 that is
 much more readable than the current implementation which is unnecessary
 complicated and not even understandable by the LLVM/clang analyzer.
 That piece of code should be replaced.

 FYI, if you want to work on CPU cycle optimization, use valgrind --
 tool=callgrind for your wget command to be polished. The resulting file
 can be
 viewed with e.g. kcachegrind.

 Tim


 Am Freitag, 24. Oktober 2014, 19:27:43 schrieb Pär Karlsson:
  Well, I wrote a little benchmark and implemented them all in a hastily
  thrown together little program.
 
  Basically, it tests three different strings against all three
  implementations of the functions (but it uses malloc instead of xmalloc,
  because I didn't want to throw in the whole of gnulib there too):
 
  The results from 1 million iterations on each string (on an AMD
 Athlon(tm)
  Dual Core Processor 4850e) are as follows:
 
  feinorgh@elektra ~/code $ make strtest
  cc strtest.c   -o strtest
  feinorgh@elektra ~/code $ ./strtest
  Result concat_strings: avg 0.0018 s, total 0.18253400 s
  Result concat_strings_new: avg 0.0025 s, total 0.25143400 s
  Result concat_strings_tim: avg 0.0036 s, total 0.36166900 s
  Result concat_strings: avg 0.0015 s, total 0.15230500 s
  Result concat_strings_new: avg 0.0022 s, total 0.22280100 s
  Result concat_strings_tim: avg 0.0027 s, total 0.27337900 s
  Result concat_strings: avg 0.0073 s, total 0.72672500 s
  Result concat_strings_new: avg 0.0066 s, total 0.66113200 s
  Result concat_strings_tim: avg 0.0148 s, total 1.47995200 s
  feinorgh@elektra ~/code $ gcc -O3 -march=native -o strtest strtest.c
 -Wall
  feinorgh@elektra ~/code $ ./strtest
  Result concat_strings: avg 0.0013 s, total 0.12796300 s
  Result concat_strings_new: avg 0.0020 s, total 0.20472700 s
  Result concat_strings_tim: avg 0.0016 s, total 0.16481100 s
  Result concat_strings: avg 0.0011 s, total 0.10742400 s
  Result concat_strings_new: avg 0.0018 s, total 0.18417900 s
  Result concat_strings_tim: avg 0.0013 s, total 0.12504500 s
  Result concat_strings: avg 0.0059 s, total 0.58840200 s
  Result concat_strings_new: avg 0.0054 s, total 0.53843800 s
  Result concat_strings_tim: avg 0.0068 s, total 0.68416400 s
  feinorgh@elektra ~/code $ gcc -g -march=native -o strtest strtest.c
 -Wall
  feinorgh@elektra ~/code $ valgrind ./strtest
  ==3802== Memcheck, a memory error detector
  ==3802== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
  ==3802== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright
 info
  ==3802== Command: ./strtest
  ==3802==
  Result concat_strings: avg 0.0754 s, total 7.54378500 s
  Result concat_strings_new: avg 0.1080 s, total 10.80041900 s
  Result concat_strings_tim: avg 0.0900 s, total 8.99982200 s
  Result concat_strings: avg 0.0652 s, total 6.52148200 s
  Result concat_strings_new: avg 0.0985 s, total 9.85281200 s
  Result concat_strings_tim: avg 0.0770 s, total 7.69649600 s
  Result concat_strings: avg 0.2109 s, total 21.08910100 s
  Result concat_strings_new: avg 0.2570 s, total 25.69685100 s
  Result concat_strings_tim: avg 0.2541 s, total 25.40761200 s
  ==3802==
  ==3802== HEAP SUMMARY:
  ==3802== in use at exit: 0 bytes in 0 blocks
  ==3802==   total heap usage: 13,000,000 allocs, 13,000,000 frees,
  743,000,000 bytes allocated
  ==3802==
  ==3802== All heap blocks were freed -- no leaks are possible
  ==3802==
  ==3802== For counts of detected and suppressed errors, rerun with: -v
  ==3802== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
 
  All three implementations perform correctly as far as I can tell (they
 give
  identical results), and behave well regarding memory management (as long
 as
  the allocated space is free():d afterwards).
 
  I tested it with gperf too, and it kind of indicated the same results,
 the
  original implementation is the fastest.
 
  I'm almost ashamed to attach the benchmark program, since it's so
 clumsily
  (and copy-pastily) put together, but for completeness, I attach it anyway
  (strtest.c.gz).
 
  My conclusion is: the current implementation is the best and most
  efficient; it does everything it's supposed to do, and it does it quicker
  (at least on my machine) than any of the given alternatives. For 5
  arguments or less, it's the most efficient implementation, and nowhere in
  the current codebase is there a place where more than 5 strings are
 used...
  to my knowledge :-) 

Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-24 Thread Pär Karlsson
Well, I wrote a little benchmark and implemented them all in a hastily
thrown together little program.

Basically, it tests three different strings against all three
implementations of the functions (but it uses malloc instead of xmalloc,
because I didn't want to throw in the whole of gnulib there too):

The results from 1 million iterations on each string (on an AMD Athlon(tm)
Dual Core Processor 4850e) are as follows:

feinorgh@elektra ~/code $ make strtest
cc strtest.c   -o strtest
feinorgh@elektra ~/code $ ./strtest
Result concat_strings: avg 0.0018 s, total 0.18253400 s
Result concat_strings_new: avg 0.0025 s, total 0.25143400 s
Result concat_strings_tim: avg 0.0036 s, total 0.36166900 s
Result concat_strings: avg 0.0015 s, total 0.15230500 s
Result concat_strings_new: avg 0.0022 s, total 0.22280100 s
Result concat_strings_tim: avg 0.0027 s, total 0.27337900 s
Result concat_strings: avg 0.0073 s, total 0.72672500 s
Result concat_strings_new: avg 0.0066 s, total 0.66113200 s
Result concat_strings_tim: avg 0.0148 s, total 1.47995200 s
feinorgh@elektra ~/code $ gcc -O3 -march=native -o strtest strtest.c -Wall
feinorgh@elektra ~/code $ ./strtest
Result concat_strings: avg 0.0013 s, total 0.12796300 s
Result concat_strings_new: avg 0.0020 s, total 0.20472700 s
Result concat_strings_tim: avg 0.0016 s, total 0.16481100 s
Result concat_strings: avg 0.0011 s, total 0.10742400 s
Result concat_strings_new: avg 0.0018 s, total 0.18417900 s
Result concat_strings_tim: avg 0.0013 s, total 0.12504500 s
Result concat_strings: avg 0.0059 s, total 0.58840200 s
Result concat_strings_new: avg 0.0054 s, total 0.53843800 s
Result concat_strings_tim: avg 0.0068 s, total 0.68416400 s
feinorgh@elektra ~/code $ gcc -g -march=native -o strtest strtest.c -Wall
feinorgh@elektra ~/code $ valgrind ./strtest
==3802== Memcheck, a memory error detector
==3802== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
==3802== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
==3802== Command: ./strtest
==3802==
Result concat_strings: avg 0.0754 s, total 7.54378500 s
Result concat_strings_new: avg 0.1080 s, total 10.80041900 s
Result concat_strings_tim: avg 0.0900 s, total 8.99982200 s
Result concat_strings: avg 0.0652 s, total 6.52148200 s
Result concat_strings_new: avg 0.0985 s, total 9.85281200 s
Result concat_strings_tim: avg 0.0770 s, total 7.69649600 s
Result concat_strings: avg 0.2109 s, total 21.08910100 s
Result concat_strings_new: avg 0.2570 s, total 25.69685100 s
Result concat_strings_tim: avg 0.2541 s, total 25.40761200 s
==3802==
==3802== HEAP SUMMARY:
==3802== in use at exit: 0 bytes in 0 blocks
==3802==   total heap usage: 13,000,000 allocs, 13,000,000 frees,
743,000,000 bytes allocated
==3802==
==3802== All heap blocks were freed -- no leaks are possible
==3802==
==3802== For counts of detected and suppressed errors, rerun with: -v
==3802== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)

All three implementations perform correctly as far as I can tell (they give
identical results), and behave well regarding memory management (as long as
the allocated space is free():d afterwards).

I tested it with gperf too, and it kind of indicated the same results, the
original implementation is the fastest.

I'm almost ashamed to attach the benchmark program, since it's so clumsily
(and copy-pastily) put together, but for completeness, I attach it anyway
(strtest.c.gz).

My conclusion is: the current implementation is the best and most
efficient; it does everything it's supposed to do, and it does it quicker
(at least on my machine) than any of the given alternatives. For 5
arguments or less, it's the most efficient implementation, and nowhere in
the current codebase is there a place where more than 5 strings are used...
to my knowledge :-) There's really nothing wrong with it, IMO :-)

Best regards,

/Pär

2014-10-23 22:01 GMT+02:00 Tim Rühsen tim.rueh...@gmx.de:

 Am Dienstag, 21. Oktober 2014, 16:49:12 schrieb Yousong Zhou:
  On 21 October 2014 16:17, Pär Karlsson feino...@gmail.com wrote:
   Yes, you are right, of course. Looking through the original
 implementation
   again, it seems water tight. clang probably complains about the
   uninitialized values above argcount in saved_lengths[], that are never
   reached.
  
   The precalculated strlen:s saved is likely only an optimization(?)
   attempt,
   I suppose.
 
  Yes. Grepping through the code shows that currently there is no
  invocation of concat_strings() having more than 5 arguments.
 
   Still, it seems wasteful to set up two complete loops with va_arg, and
   considering what this function actually does, I wonder if not
 s(n)printf
   should be used instead of this function? :-)
 
  I think concat_strings() is more tight and readable than multiple
  strlen() + malloc() + 

Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-24 Thread Tim Rühsen
Pär, thanks for your work.

*BUT* speed does not really matter here. My intention was to show code that is 
much more readable than the current implementation which is unnecessary 
complicated and not even understandable by the LLVM/clang analyzer.
That piece of code should be replaced.

FYI, if you want to work on CPU cycle optimization, use valgrind --
tool=callgrind for your wget command to be polished. The resulting file can be 
viewed with e.g. kcachegrind.

Tim


Am Freitag, 24. Oktober 2014, 19:27:43 schrieb Pär Karlsson:
 Well, I wrote a little benchmark and implemented them all in a hastily
 thrown together little program.
 
 Basically, it tests three different strings against all three
 implementations of the functions (but it uses malloc instead of xmalloc,
 because I didn't want to throw in the whole of gnulib there too):
 
 The results from 1 million iterations on each string (on an AMD Athlon(tm)
 Dual Core Processor 4850e) are as follows:
 
 feinorgh@elektra ~/code $ make strtest
 cc strtest.c   -o strtest
 feinorgh@elektra ~/code $ ./strtest
 Result concat_strings: avg 0.0018 s, total 0.18253400 s
 Result concat_strings_new: avg 0.0025 s, total 0.25143400 s
 Result concat_strings_tim: avg 0.0036 s, total 0.36166900 s
 Result concat_strings: avg 0.0015 s, total 0.15230500 s
 Result concat_strings_new: avg 0.0022 s, total 0.22280100 s
 Result concat_strings_tim: avg 0.0027 s, total 0.27337900 s
 Result concat_strings: avg 0.0073 s, total 0.72672500 s
 Result concat_strings_new: avg 0.0066 s, total 0.66113200 s
 Result concat_strings_tim: avg 0.0148 s, total 1.47995200 s
 feinorgh@elektra ~/code $ gcc -O3 -march=native -o strtest strtest.c -Wall
 feinorgh@elektra ~/code $ ./strtest
 Result concat_strings: avg 0.0013 s, total 0.12796300 s
 Result concat_strings_new: avg 0.0020 s, total 0.20472700 s
 Result concat_strings_tim: avg 0.0016 s, total 0.16481100 s
 Result concat_strings: avg 0.0011 s, total 0.10742400 s
 Result concat_strings_new: avg 0.0018 s, total 0.18417900 s
 Result concat_strings_tim: avg 0.0013 s, total 0.12504500 s
 Result concat_strings: avg 0.0059 s, total 0.58840200 s
 Result concat_strings_new: avg 0.0054 s, total 0.53843800 s
 Result concat_strings_tim: avg 0.0068 s, total 0.68416400 s
 feinorgh@elektra ~/code $ gcc -g -march=native -o strtest strtest.c -Wall
 feinorgh@elektra ~/code $ valgrind ./strtest
 ==3802== Memcheck, a memory error detector
 ==3802== Copyright (C) 2002-2013, and GNU GPL'd, by Julian Seward et al.
 ==3802== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
 ==3802== Command: ./strtest
 ==3802==
 Result concat_strings: avg 0.0754 s, total 7.54378500 s
 Result concat_strings_new: avg 0.1080 s, total 10.80041900 s
 Result concat_strings_tim: avg 0.0900 s, total 8.99982200 s
 Result concat_strings: avg 0.0652 s, total 6.52148200 s
 Result concat_strings_new: avg 0.0985 s, total 9.85281200 s
 Result concat_strings_tim: avg 0.0770 s, total 7.69649600 s
 Result concat_strings: avg 0.2109 s, total 21.08910100 s
 Result concat_strings_new: avg 0.2570 s, total 25.69685100 s
 Result concat_strings_tim: avg 0.2541 s, total 25.40761200 s
 ==3802==
 ==3802== HEAP SUMMARY:
 ==3802== in use at exit: 0 bytes in 0 blocks
 ==3802==   total heap usage: 13,000,000 allocs, 13,000,000 frees,
 743,000,000 bytes allocated
 ==3802==
 ==3802== All heap blocks were freed -- no leaks are possible
 ==3802==
 ==3802== For counts of detected and suppressed errors, rerun with: -v
 ==3802== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 1 from 1)
 
 All three implementations perform correctly as far as I can tell (they give
 identical results), and behave well regarding memory management (as long as
 the allocated space is free():d afterwards).
 
 I tested it with gperf too, and it kind of indicated the same results, the
 original implementation is the fastest.
 
 I'm almost ashamed to attach the benchmark program, since it's so clumsily
 (and copy-pastily) put together, but for completeness, I attach it anyway
 (strtest.c.gz).
 
 My conclusion is: the current implementation is the best and most
 efficient; it does everything it's supposed to do, and it does it quicker
 (at least on my machine) than any of the given alternatives. For 5
 arguments or less, it's the most efficient implementation, and nowhere in
 the current codebase is there a place where more than 5 strings are used...
 to my knowledge :-) There's really nothing wrong with it, IMO :-)
 
 Best regards,
 
 /Pär
 
 2014-10-23 22:01 GMT+02:00 Tim Rühsen tim.rueh...@gmx.de:
  Am Dienstag, 21. Oktober 2014, 16:49:12 schrieb Yousong Zhou:
   On 21 October 2014 16:17, Pär Karlsson feino...@gmail.com wrote:
Yes, you are right, of course. Looking through the original
  
  implementation
  
again, it seems water tight. clang probably complains 

Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-23 Thread Tim Rühsen
Am Dienstag, 21. Oktober 2014, 16:49:12 schrieb Yousong Zhou:
 On 21 October 2014 16:17, Pär Karlsson feino...@gmail.com wrote:
  Yes, you are right, of course. Looking through the original implementation
  again, it seems water tight. clang probably complains about the
  uninitialized values above argcount in saved_lengths[], that are never
  reached.
  
  The precalculated strlen:s saved is likely only an optimization(?)
  attempt,
  I suppose.
 
 Yes. Grepping through the code shows that currently there is no
 invocation of concat_strings() having more than 5 arguments.
 
  Still, it seems wasteful to set up two complete loops with va_arg, and
  considering what this function actually does, I wonder if not s(n)printf
  should be used instead of this function? :-)
 
 I think concat_strings() is more tight and readable than multiple
 strlen() + malloc() + snprintf().
 
 Regards.
 
yousong

Reading the answers here tells me, something is wrong with this function. 
There is misunderstanding / misinterpretation regarding the code.
Not only the developers here are affected, but also clang (version 3.3 upto 
3.6).

Here is my approach - introducing a helper function (strlcpy, which exists in 
BSD for a long time). It seems very straight forward to me.

How you like it ?


char *
concat_strings (const char *str0, ...)
{
  va_list args;
  const char *arg;
  size_t length = 0, pos = 0;
  char *s;

  if (!str0)
return NULL;

  va_start (args, str0);
  for (arg = str0; arg; arg = va_arg (args, const char *))
length += strlen(arg);
  va_end (args);

  s = xmalloc (length + 1);

  va_start (args, str0);
  for (arg = str0; arg; arg = va_arg (args, const char *))
pos += strlcpy(s + pos, arg, length - pos + 1);
  va_end (args);

  return s;
}


strlcpy() can often be used as replacement for
  len = snprintf(buf,%s,string);
but is ways faster.

FYI, my strlcpy() code is


#ifndef HAVE_STRLCPY
size_t
strlcpy (char *dst, const char *src, size_t size)
{
  const char *old = src;

  /* Copy as many bytes as will fit */
  if (size)
{
  while (--size)
{
  if (!(*dst++ = *src++))
return src - old - 1;
}

  *dst = 0;
}

  while (*src++);
  return src - old - 1;
}
#endif


BTW, I already tested this in my local copy of Wget. Passes all tests (as 
expected ;-)

Tim


signature.asc
Description: This is a digitally signed message part.


Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-21 Thread Pär Karlsson
Yes, you are right, of course. Looking through the original implementation
again, it seems water tight. clang probably complains about the
uninitialized values above argcount in saved_lengths[], that are never
reached.

The precalculated strlen:s saved is likely only an optimization(?) attempt,
I suppose.

Still, it seems wasteful to set up two complete loops with va_arg, and
considering what this function actually does, I wonder if not s(n)printf
should be used instead of this function? :-)

Best regards,

/Pär

2014-10-21 4:22 GMT+02:00 Yousong Zhou yszhou4t...@gmail.com:

 Hi, Pär

 On 17 October 2014 03:50, Pär Karlsson feino...@gmail.com wrote:
  Hi, I fould a potential gotcha when playing with clang's code analysis
 tool.
 
  The concat_strings function silently stopped counting string lengths when
  given more than 5 arguments. clang warned about potential garbage values
 in
  the saved_lengths array, so I redid it with this approach.

 After taking a closer look, I guess the old implementation is fine.
 saved_length[] is used as a buffer for lengths of the first 5
 arguments and there is a bound check with its length.  Maybe it's a
 false-positive from clang tool?

 Sorry for the noise...

 Regards.

 yousong

 
  All tests working ok with this patch.
 
  This is my first patch to this list, by the way. I'd be happy to help out
  more in the future.
 
  Best regards,
 
  /Pär Karlsson, Sweden
 
  
 
  commit 2d855670e0e1fbe578506b376cdd40b0e465d3ef
  Author: Pär Karlsson feino...@gmail.com
  Date:   Thu Oct 16 21:41:36 2014 +0200
 
  Updated ChangeLog
 
  diff --git a/src/ChangeLog b/src/ChangeLog
  index 1c4e2d5..1e39475 100644
  --- a/src/ChangeLog
  +++ b/src/ChangeLog
  @@ -1,3 +1,8 @@
  +2014-10-16  Pär Karlsson  feino...@gmail.com
  +
  +   * utils.c (concat_strings): fixed arbitrary limit of 5 arguments
 to
  +   function
  +
   2014-05-03  Tim Ruehsen  tim.rueh...@gmx.de
 
  * retr.c (retrieve_url): fixed memory leak
 
  commit 1fa9ff274dcb6e5a2dbbbc7d3fe2f139059c47f1
  Author: Pär Karlsson feino...@gmail.com
  Date:   Wed Oct 15 00:00:31 2014 +0200
 
  Generalized concat_strings argument length
 
The concat_strings function seemed arbitrary to only accept a
 maximum
of 5 arguments (the others were silently ignored).
 
Also it had a potential garbage read of the values in the array.
Updated with xmalloc/xrealloc/free
 
  diff --git a/src/utils.c b/src/utils.c
  index 78c282e..93c9ddc 100644
  --- a/src/utils.c
  +++ b/src/utils.c
  @@ -356,7 +356,8 @@ char *
   concat_strings (const char *str0, ...)
   {
 va_list args;
  -  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
  +  size_t psize = sizeof(int);
  +  int *saved_lengths = xmalloc (psize);
 char *ret, *p;
 
 const char *next_str;
  @@ -370,8 +371,8 @@ concat_strings (const char *str0, ...)
 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char
 *))
   {
 int len = strlen (next_str);
  -  if (argcount  countof (saved_lengths))
  -saved_lengths[argcount++] = len;
  +  saved_lengths[argcount++] = len;
  +  xrealloc(saved_lengths, psize * argcount);
 total_length += len;
   }
 va_end (args);
  @@ -393,7 +394,7 @@ concat_strings (const char *str0, ...)
   }
 va_end (args);
 *p = '\0';
  -
  +  free(saved_lengths);
 return ret;
   }
   ^L



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-21 Thread Yousong Zhou
On 21 October 2014 16:17, Pär Karlsson feino...@gmail.com wrote:
 Yes, you are right, of course. Looking through the original implementation
 again, it seems water tight. clang probably complains about the
 uninitialized values above argcount in saved_lengths[], that are never
 reached.

 The precalculated strlen:s saved is likely only an optimization(?) attempt,
 I suppose.

Yes. Grepping through the code shows that currently there is no
invocation of concat_strings() having more than 5 arguments.


 Still, it seems wasteful to set up two complete loops with va_arg, and
 considering what this function actually does, I wonder if not s(n)printf
 should be used instead of this function? :-)

I think concat_strings() is more tight and readable than multiple
strlen() + malloc() + snprintf().

Regards.

   yousong



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-21 Thread Micah Cowan
On Mon, Oct 20, 2014 at 7:02 PM, Yousong Zhou yszhou4t...@gmail.com wrote:
 I am not sure here.  Do we always assume sizeof(char) to be 1 for
 platforms supported by wget?

FWIW, sizeof(char) is always 1 by definition; the C standard
guarantees it. Even on systems with no addressable values smaller than
16 bits, because on such systems, C defines a byte to be 16 bits
(recall that, originally at least, a byte isn't necessarily an octet,
and that's the meaning the C standards use).

If a platform doesn't have sizeof(char) == 1, it's not the C language. :)

-mjc



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-21 Thread Pär Karlsson
Thanks!

It didn't even occur to me to check this out. My only excuse is gratuitous
consistency and lack of pure C experience; a malloc() without a
corresponding sizeof() seemed a little arbitrary to me, but it makes sense
now :-)

/Pär

2014-10-21 17:46 GMT+02:00 Micah Cowan mi...@addictivecode.org:

 On Mon, Oct 20, 2014 at 7:02 PM, Yousong Zhou yszhou4t...@gmail.com
 wrote:
  I am not sure here.  Do we always assume sizeof(char) to be 1 for
  platforms supported by wget?

 FWIW, sizeof(char) is always 1 by definition; the C standard
 guarantees it. Even on systems with no addressable values smaller than
 16 bits, because on such systems, C defines a byte to be 16 bits
 (recall that, originally at least, a byte isn't necessarily an octet,
 and that's the meaning the C standards use).

 If a platform doesn't have sizeof(char) == 1, it's not the C language. :)

 -mjc



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-21 Thread Micah Cowan
On Tue, Oct 21, 2014 at 8:55 AM, Pär Karlsson feino...@gmail.com wrote:
 Thanks!

 It didn't even occur to me to check this out. My only excuse is gratuitous
 consistency and lack of pure C experience; a malloc() without a
 corresponding sizeof() seemed a little arbitrary to me, but it makes sense
 now :-)

Well, I'm always happy to see someone being more careful than
necessary, than the other direction. ;)

-mjc



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-20 Thread Pär Karlsson
Thank you for your feedback and suggestions.

I thought about this during the weekend and figured it could be done much
more efficiently, by only looping through the arguments once. Also, I
realized the the original version would have segfaulted if called with more
than 5 args, since the destination memory never got allocated before the
memcpy (in the current code, it never is, though!).

I cleaned up the code according to the GNU coding standards as well. The
test suite rolls with this, and I think it looks better (although the
function is only really called in a handful of places in all of wget).

Best regards,

/Pär

Patch below:

diff --git a/src/ChangeLog b/src/ChangeLog
index d5aeca0..87abd85 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2014-10-20 Pär Karlsson  feino...@gmail.com
+
+   * utils.c (concat_strings): got rid of double loop, cleaned up
potential
+   memory corruption if concat_strings was called with more than five
args
+
 2014-10-16  Tim Ruehsen  tim.rueh...@gmx.de

* url.c (url_parse): little code cleanup
diff --git a/src/utils.c b/src/utils.c
index 78c282e..dbeb9fe 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -356,42 +356,36 @@ char *
 concat_strings (const char *str0, ...)
 {
   va_list args;
-  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
   char *ret, *p;

   const char *next_str;
-  int total_length = 0;
-  size_t argcount;
+  size_t len;
+  size_t total_length = 0;
+  size_t charsize = sizeof (char);
+  size_t chunksize = 64;
+  size_t bufsize = 64;
+
+  p = ret = xmalloc (charsize * bufsize);

   /* Calculate the length of and allocate the resulting string. */

-  argcount = 0;
   va_start (args, str0);
   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 {
-  int len = strlen (next_str);
-  if (argcount  countof (saved_lengths))
-saved_lengths[argcount++] = len;
+  len = strlen (next_str);
+  if (len == 0) {
+  continue;
+  }
   total_length += len;
-}
-  va_end (args);
-  p = ret = xmalloc (total_length + 1);
-
-  /* Copy the strings into the allocated space. */
-
-  argcount = 0;
-  va_start (args, str0);
-  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
-{
-  int len;
-  if (argcount  countof (saved_lengths))
-len = saved_lengths[argcount++];
-  else
-len = strlen (next_str);
+  if (total_length  bufsize) {
+  bufsize += chunksize;
+  ret = xrealloc (ret, charsize * bufsize);
+  }
   memcpy (p, next_str, len);
   p += len;
 }
   va_end (args);
+  ret = xrealloc (ret, charsize * total_length + 1);
   *p = '\0';

   return ret;



2014-10-19 16:16 GMT+02:00 Giuseppe Scrivano gscriv...@gnu.org:

 Pär Karlsson feino...@gmail.com writes:

  Hi, I fould a potential gotcha when playing with clang's code analysis
 tool.
 
  The concat_strings function silently stopped counting string lengths when
  given more than 5 arguments. clang warned about potential garbage values
 in
  the saved_lengths array, so I redid it with this approach.
 
  All tests working ok with this patch.

 thanks for your contribution.  I've just few comments:


  commit 2d855670e0e1fbe578506b376cdd40b0e465d3ef
  Author: Pär Karlsson feino...@gmail.com
  Date:   Thu Oct 16 21:41:36 2014 +0200
 
  Updated ChangeLog
 

 we usually update the changelog in the same commit that made the change,
 so please squash these two commits into one.

 Also, it doesn't apply on current git master, as it seems to be based on
 a old version of git from the ChangeLog file context, could you rebase
 onto the master branch?


  diff --git a/src/utils.c b/src/utils.c
  index 78c282e..93c9ddc 100644
  --- a/src/utils.c
  +++ b/src/utils.c
  @@ -356,7 +356,8 @@ char *
   concat_strings (const char *str0, ...)
   {
 va_list args;
  -  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
  +  size_t psize = sizeof(int);

 please leave a space between sizeof and '(' as mandated by the GNU
 coding standards.

  +  int *saved_lengths = xmalloc (psize);
 char *ret, *p;
 
 const char *next_str;
  @@ -370,8 +371,8 @@ concat_strings (const char *str0, ...)
 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char
 *))
   {
 int len = strlen (next_str);
  -  if (argcount  countof (saved_lengths))
  -saved_lengths[argcount++] = len;
  +  saved_lengths[argcount++] = len;
  +  xrealloc(saved_lengths, psize * argcount);

 same here.

 total_length += len;
   }
 va_end (args);
  @@ -393,7 +394,7 @@ concat_strings (const char *str0, ...)
   }
 va_end (args);
 *p = '\0';
  -
  +  free(saved_lengths);

 and here.

 Regards,
 Giuseppe



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-20 Thread Pär Karlsson
Whoops, I realised I failed on the GNU coding standards, please disregard
the last one; the patch below should be better.

My apologies :-/

/Pär

diff --git a/src/ChangeLog b/src/ChangeLog
index d5aeca0..87abd85 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2014-10-20 Pär Karlsson  feino...@gmail.com
+
+   * utils.c (concat_strings): got rid of double loop, cleaned up
potential
+   memory corruption if concat_strings was called with more than five
args
+
 2014-10-16  Tim Ruehsen  tim.rueh...@gmx.de

* url.c (url_parse): little code cleanup
diff --git a/src/utils.c b/src/utils.c
index 78c282e..5f359e0 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -356,42 +356,36 @@ char *
 concat_strings (const char *str0, ...)
 {
   va_list args;
-  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
   char *ret, *p;

   const char *next_str;
-  int total_length = 0;
-  size_t argcount;
+  size_t len;
+  size_t total_length = 0;
+  size_t charsize = sizeof (char);
+  size_t chunksize = 64;
+  size_t bufsize = 64;
+
+  p = ret = xmalloc (charsize * bufsize);

   /* Calculate the length of and allocate the resulting string. */

-  argcount = 0;
   va_start (args, str0);
   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 {
-  int len = strlen (next_str);
-  if (argcount  countof (saved_lengths))
-saved_lengths[argcount++] = len;
+  len = strlen (next_str);
+  if (len == 0)
+continue;
   total_length += len;
-}
-  va_end (args);
-  p = ret = xmalloc (total_length + 1);
-
-  /* Copy the strings into the allocated space. */
-
-  argcount = 0;
-  va_start (args, str0);
-  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
-{
-  int len;
-  if (argcount  countof (saved_lengths))
-len = saved_lengths[argcount++];
-  else
-len = strlen (next_str);
+  if (total_length  bufsize)
+  {
+bufsize += chunksize;
+ret = xrealloc (ret, charsize * bufsize);
+  }
   memcpy (p, next_str, len);
   p += len;
 }
   va_end (args);
+  ret = xrealloc (ret, charsize * total_length + 1);
   *p = '\0';

   return ret;

2014-10-20 22:14 GMT+02:00 Pär Karlsson feino...@gmail.com:

 Thank you for your feedback and suggestions.

 I thought about this during the weekend and figured it could be done much
 more efficiently, by only looping through the arguments once. Also, I
 realized the the original version would have segfaulted if called with more
 than 5 args, since the destination memory never got allocated before the
 memcpy (in the current code, it never is, though!).

 I cleaned up the code according to the GNU coding standards as well. The
 test suite rolls with this, and I think it looks better (although the
 function is only really called in a handful of places in all of wget).

 Best regards,

 /Pär

 Patch below:

 diff --git a/src/ChangeLog b/src/ChangeLog
 index d5aeca0..87abd85 100644
 --- a/src/ChangeLog
 +++ b/src/ChangeLog
 @@ -1,3 +1,8 @@
 +2014-10-20 Pär Karlsson  feino...@gmail.com
 +
 +   * utils.c (concat_strings): got rid of double loop, cleaned up
 potential
 +   memory corruption if concat_strings was called with more than five
 args
 +
  2014-10-16  Tim Ruehsen  tim.rueh...@gmx.de

 * url.c (url_parse): little code cleanup
 diff --git a/src/utils.c b/src/utils.c
 index 78c282e..dbeb9fe 100644
 --- a/src/utils.c
 +++ b/src/utils.c
 @@ -356,42 +356,36 @@ char *
  concat_strings (const char *str0, ...)
  {
va_list args;
 -  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
char *ret, *p;

const char *next_str;
 -  int total_length = 0;
 -  size_t argcount;
 +  size_t len;
 +  size_t total_length = 0;
 +  size_t charsize = sizeof (char);
 +  size_t chunksize = 64;
 +  size_t bufsize = 64;
 +
 +  p = ret = xmalloc (charsize * bufsize);

/* Calculate the length of and allocate the resulting string. */

 -  argcount = 0;
va_start (args, str0);
for (next_str = str0; next_str != NULL; next_str = va_arg (args, char
 *))
  {
 -  int len = strlen (next_str);
 -  if (argcount  countof (saved_lengths))
 -saved_lengths[argcount++] = len;
 +  len = strlen (next_str);
 +  if (len == 0) {
 +  continue;
 +  }
total_length += len;
 -}
 -  va_end (args);
 -  p = ret = xmalloc (total_length + 1);
 -
 -  /* Copy the strings into the allocated space. */
 -
 -  argcount = 0;
 -  va_start (args, str0);
 -  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char
 *))
 -{
 -  int len;
 -  if (argcount  countof (saved_lengths))
 -len = saved_lengths[argcount++];
 -  else
 -len = strlen (next_str);
 +  if (total_length  bufsize) {
 +  bufsize += chunksize;
 +  ret = xrealloc (ret, charsize * bufsize);
 +  }
memcpy (p, next_str, len);
p += len;
   

Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-20 Thread Yousong Zhou
Hi, Pär.  I got a few comments inline.

On 21 October 2014 05:47, Pär Karlsson feino...@gmail.com wrote:
 Whoops, I realised I failed on the GNU coding standards, please disregard
 the last one; the patch below should be better.

 My apologies :-/

 /Pär

 diff --git a/src/ChangeLog b/src/ChangeLog
 index d5aeca0..87abd85 100644
 --- a/src/ChangeLog
 +++ b/src/ChangeLog
 @@ -1,3 +1,8 @@
 +2014-10-20 Pär Karlsson  feino...@gmail.com
 +
 +   * utils.c (concat_strings): got rid of double loop, cleaned up
 potential
 +   memory corruption if concat_strings was called with more than five
 args
 +
  2014-10-16  Tim Ruehsen  tim.rueh...@gmx.de

 * url.c (url_parse): little code cleanup
 diff --git a/src/utils.c b/src/utils.c
 index 78c282e..5f359e0 100644
 --- a/src/utils.c
 +++ b/src/utils.c
 @@ -356,42 +356,36 @@ char *
  concat_strings (const char *str0, ...)
  {
va_list args;
 -  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
char *ret, *p;

const char *next_str;
 -  int total_length = 0;
 -  size_t argcount;
 +  size_t len;
 +  size_t total_length = 0;
 +  size_t charsize = sizeof (char);

I am not sure here.  Do we always assume sizeof(char) to be 1 for
platforms supported by wget?

 +  size_t chunksize = 64;
 +  size_t bufsize = 64;
 +
 +  p = ret = xmalloc (charsize * bufsize);

/* Calculate the length of and allocate the resulting string. */

 -  argcount = 0;
va_start (args, str0);
for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
  {
 -  int len = strlen (next_str);
 -  if (argcount  countof (saved_lengths))
 -saved_lengths[argcount++] = len;
 +  len = strlen (next_str);
 +  if (len == 0)
 +continue;
total_length += len;
 -}
 -  va_end (args);
 -  p = ret = xmalloc (total_length + 1);
 -
 -  /* Copy the strings into the allocated space. */
 -
 -  argcount = 0;
 -  va_start (args, str0);
 -  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 -{
 -  int len;
 -  if (argcount  countof (saved_lengths))
 -len = saved_lengths[argcount++];
 -  else
 -len = strlen (next_str);
 +  if (total_length  bufsize)
 +  {
 +bufsize += chunksize;

Should be `bufsize = total_length` ?

 +ret = xrealloc (ret, charsize * bufsize);
 +  }
memcpy (p, next_str, len);

Xrealloc may return a new block different from p, so memcpy(p, ...)
may not be what you want.

p += len;
  }
va_end (args);
 +  ret = xrealloc (ret, charsize * total_length + 1);
*p = '\0';

Malloc takes time.  How about counting total_length in one loop and
doing the copy in another?

Regards.

yousong


return ret;




Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-20 Thread Yousong Zhou
On 21 October 2014 10:02, Yousong Zhou yszhou4t...@gmail.com wrote:
 Hi, Pär.  I got a few comments inline.

 On 21 October 2014 05:47, Pär Karlsson feino...@gmail.com wrote:
 Whoops, I realised I failed on the GNU coding standards, please disregard
 the last one; the patch below should be better.

 My apologies :-/

 /Pär

 diff --git a/src/ChangeLog b/src/ChangeLog
 index d5aeca0..87abd85 100644
 --- a/src/ChangeLog
 +++ b/src/ChangeLog
 @@ -1,3 +1,8 @@
 +2014-10-20 Pär Karlsson  feino...@gmail.com
 +
 +   * utils.c (concat_strings): got rid of double loop, cleaned up
 potential
 +   memory corruption if concat_strings was called with more than five
 args
 +
  2014-10-16  Tim Ruehsen  tim.rueh...@gmx.de

 * url.c (url_parse): little code cleanup
 diff --git a/src/utils.c b/src/utils.c
 index 78c282e..5f359e0 100644
 --- a/src/utils.c
 +++ b/src/utils.c
 @@ -356,42 +356,36 @@ char *
  concat_strings (const char *str0, ...)
  {
va_list args;
 -  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
char *ret, *p;

const char *next_str;
 -  int total_length = 0;
 -  size_t argcount;
 +  size_t len;
 +  size_t total_length = 0;
 +  size_t charsize = sizeof (char);

 I am not sure here.  Do we always assume sizeof(char) to be 1 for
 platforms supported by wget?

 +  size_t chunksize = 64;
 +  size_t bufsize = 64;
 +
 +  p = ret = xmalloc (charsize * bufsize);

/* Calculate the length of and allocate the resulting string. */

 -  argcount = 0;
va_start (args, str0);
for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
  {
 -  int len = strlen (next_str);
 -  if (argcount  countof (saved_lengths))
 -saved_lengths[argcount++] = len;
 +  len = strlen (next_str);
 +  if (len == 0)
 +continue;
total_length += len;
 -}
 -  va_end (args);
 -  p = ret = xmalloc (total_length + 1);
 -
 -  /* Copy the strings into the allocated space. */
 -
 -  argcount = 0;
 -  va_start (args, str0);
 -  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 -{
 -  int len;
 -  if (argcount  countof (saved_lengths))
 -len = saved_lengths[argcount++];
 -  else
 -len = strlen (next_str);
 +  if (total_length  bufsize)
 +  {
 +bufsize += chunksize;

 Should be `bufsize = total_length` ?

 +ret = xrealloc (ret, charsize * bufsize);
 +  }
memcpy (p, next_str, len);

 Xrealloc may return a new block different from p, so memcpy(p, ...)
 may not be what you want.

p += len;
  }
va_end (args);
 +  ret = xrealloc (ret, charsize * total_length + 1);
*p = '\0';

 Malloc takes time.  How about counting total_length in one loop and
 doing the copy in another?

I mean, we can skip the strlen part and just do strcpy in the second
loop as we already know we have enough space in the dest buffer for
all those null-terminated arguments.

 yousong



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-20 Thread Yousong Zhou
Hi, Pär

On 17 October 2014 03:50, Pär Karlsson feino...@gmail.com wrote:
 Hi, I fould a potential gotcha when playing with clang's code analysis tool.

 The concat_strings function silently stopped counting string lengths when
 given more than 5 arguments. clang warned about potential garbage values in
 the saved_lengths array, so I redid it with this approach.

After taking a closer look, I guess the old implementation is fine.
saved_length[] is used as a buffer for lengths of the first 5
arguments and there is a bound check with its length.  Maybe it's a
false-positive from clang tool?

Sorry for the noise...

Regards.

yousong


 All tests working ok with this patch.

 This is my first patch to this list, by the way. I'd be happy to help out
 more in the future.

 Best regards,

 /Pär Karlsson, Sweden

 

 commit 2d855670e0e1fbe578506b376cdd40b0e465d3ef
 Author: Pär Karlsson feino...@gmail.com
 Date:   Thu Oct 16 21:41:36 2014 +0200

 Updated ChangeLog

 diff --git a/src/ChangeLog b/src/ChangeLog
 index 1c4e2d5..1e39475 100644
 --- a/src/ChangeLog
 +++ b/src/ChangeLog
 @@ -1,3 +1,8 @@
 +2014-10-16  Pär Karlsson  feino...@gmail.com
 +
 +   * utils.c (concat_strings): fixed arbitrary limit of 5 arguments to
 +   function
 +
  2014-05-03  Tim Ruehsen  tim.rueh...@gmx.de

 * retr.c (retrieve_url): fixed memory leak

 commit 1fa9ff274dcb6e5a2dbbbc7d3fe2f139059c47f1
 Author: Pär Karlsson feino...@gmail.com
 Date:   Wed Oct 15 00:00:31 2014 +0200

 Generalized concat_strings argument length

   The concat_strings function seemed arbitrary to only accept a maximum
   of 5 arguments (the others were silently ignored).

   Also it had a potential garbage read of the values in the array.
   Updated with xmalloc/xrealloc/free

 diff --git a/src/utils.c b/src/utils.c
 index 78c282e..93c9ddc 100644
 --- a/src/utils.c
 +++ b/src/utils.c
 @@ -356,7 +356,8 @@ char *
  concat_strings (const char *str0, ...)
  {
va_list args;
 -  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
 +  size_t psize = sizeof(int);
 +  int *saved_lengths = xmalloc (psize);
char *ret, *p;

const char *next_str;
 @@ -370,8 +371,8 @@ concat_strings (const char *str0, ...)
for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
  {
int len = strlen (next_str);
 -  if (argcount  countof (saved_lengths))
 -saved_lengths[argcount++] = len;
 +  saved_lengths[argcount++] = len;
 +  xrealloc(saved_lengths, psize * argcount);
total_length += len;
  }
va_end (args);
 @@ -393,7 +394,7 @@ concat_strings (const char *str0, ...)
  }
va_end (args);
*p = '\0';
 -
 +  free(saved_lengths);
return ret;
  }
  ^L



Re: [Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-19 Thread Giuseppe Scrivano
Pär Karlsson feino...@gmail.com writes:

 Hi, I fould a potential gotcha when playing with clang's code analysis tool.

 The concat_strings function silently stopped counting string lengths when
 given more than 5 arguments. clang warned about potential garbage values in
 the saved_lengths array, so I redid it with this approach.

 All tests working ok with this patch.

thanks for your contribution.  I've just few comments:


 commit 2d855670e0e1fbe578506b376cdd40b0e465d3ef
 Author: Pär Karlsson feino...@gmail.com
 Date:   Thu Oct 16 21:41:36 2014 +0200

 Updated ChangeLog


we usually update the changelog in the same commit that made the change,
so please squash these two commits into one.

Also, it doesn't apply on current git master, as it seems to be based on
a old version of git from the ChangeLog file context, could you rebase
onto the master branch?


 diff --git a/src/utils.c b/src/utils.c
 index 78c282e..93c9ddc 100644
 --- a/src/utils.c
 +++ b/src/utils.c
 @@ -356,7 +356,8 @@ char *
  concat_strings (const char *str0, ...)
  {
va_list args;
 -  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
 +  size_t psize = sizeof(int);

please leave a space between sizeof and '(' as mandated by the GNU
coding standards.

 +  int *saved_lengths = xmalloc (psize);
char *ret, *p;

const char *next_str;
 @@ -370,8 +371,8 @@ concat_strings (const char *str0, ...)
for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
  {
int len = strlen (next_str);
 -  if (argcount  countof (saved_lengths))
 -saved_lengths[argcount++] = len;
 +  saved_lengths[argcount++] = len;
 +  xrealloc(saved_lengths, psize * argcount);

same here.

total_length += len;
  }
va_end (args);
 @@ -393,7 +394,7 @@ concat_strings (const char *str0, ...)
  }
va_end (args);
*p = '\0';
 -
 +  free(saved_lengths);

and here.

Regards,
Giuseppe



[Bug-wget] [PATCH] Small fix for limited number of strings (and potential garbage value) in arguments to concat_strings

2014-10-16 Thread Pär Karlsson
Hi, I fould a potential gotcha when playing with clang's code analysis tool.

The concat_strings function silently stopped counting string lengths when
given more than 5 arguments. clang warned about potential garbage values in
the saved_lengths array, so I redid it with this approach.

All tests working ok with this patch.

This is my first patch to this list, by the way. I'd be happy to help out
more in the future.

Best regards,

/Pär Karlsson, Sweden



commit 2d855670e0e1fbe578506b376cdd40b0e465d3ef
Author: Pär Karlsson feino...@gmail.com
Date:   Thu Oct 16 21:41:36 2014 +0200

Updated ChangeLog

diff --git a/src/ChangeLog b/src/ChangeLog
index 1c4e2d5..1e39475 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2014-10-16  Pär Karlsson  feino...@gmail.com
+
+   * utils.c (concat_strings): fixed arbitrary limit of 5 arguments to
+   function
+
 2014-05-03  Tim Ruehsen  tim.rueh...@gmx.de

* retr.c (retrieve_url): fixed memory leak

commit 1fa9ff274dcb6e5a2dbbbc7d3fe2f139059c47f1
Author: Pär Karlsson feino...@gmail.com
Date:   Wed Oct 15 00:00:31 2014 +0200

Generalized concat_strings argument length

  The concat_strings function seemed arbitrary to only accept a maximum
  of 5 arguments (the others were silently ignored).

  Also it had a potential garbage read of the values in the array.
  Updated with xmalloc/xrealloc/free

diff --git a/src/utils.c b/src/utils.c
index 78c282e..93c9ddc 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -356,7 +356,8 @@ char *
 concat_strings (const char *str0, ...)
 {
   va_list args;
-  int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
+  size_t psize = sizeof(int);
+  int *saved_lengths = xmalloc (psize);
   char *ret, *p;

   const char *next_str;
@@ -370,8 +371,8 @@ concat_strings (const char *str0, ...)
   for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
 {
   int len = strlen (next_str);
-  if (argcount  countof (saved_lengths))
-saved_lengths[argcount++] = len;
+  saved_lengths[argcount++] = len;
+  xrealloc(saved_lengths, psize * argcount);
   total_length += len;
 }
   va_end (args);
@@ -393,7 +394,7 @@ concat_strings (const char *str0, ...)
 }
   va_end (args);
   *p = '\0';
-
+  free(saved_lengths);
   return ret;
 }
 ^L