Re: [PATCH 05/10] parse_integer: convert lib/

2015-05-04 Thread Alexey Dobriyan
On Mon, May 4, 2015 at 5:10 PM, Rasmus Villemoes
 wrote:
> On Sat, May 02 2015, Alexey Dobriyan  wrote:
>
>> match_number() needlessly allocates/duplicates memory,
>> parsing can be done straight from original string.
>
> I suppose that's true, but the patch doesn't seem to do anything about
> it? It's probably better to do in a separate cleanup anyway, but then
> maybe this belongs as a comment below ---.

Sure, it is just "raise eyebrow" moment.


>> @@ -126,38 +127,37 @@ EXPORT_SYMBOL(get_options);
>>
>>  unsigned long long memparse(const char *ptr, char **retptr)
>>  {
>> - char *endptr;   /* local pointer to end of parsed string */
>> + unsigned long long val;
>>
>> - unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
>> -
>> - switch (*endptr) {
>> + ptr += parse_integer(ptr, 0, &val);
>
> This seems wrong. simple_strtoull used to "sanitize" the return value
> from the (old) _parse_integer, so that endptr still points into the
> given string. Unconditionally adding the result from parse_integer may
> make ptr point far before the actual string, into who-knows-what.

When converting I tried to preserve the amount of error checking done.
simple_strtoull() either
a) return 0 and not advance pointer, or
b) return something and advance pointer.

Current code just ignores error case, so do I.

>> --- a/lib/parser.c
>> +++ b/lib/parser.c
>> @@ -44,7 +44,7 @@ static int match_one(char *s, const char *p, substring_t 
>> args[])
>>   p = meta + 1;
>>
>>   if (isdigit(*p))
>> - len = simple_strtoul(p, (char **) &p, 10);
>> + p += parse_integer(p, 10, (unsigned int *)&len);
>
> Hm, I think passing cast expressions to parse_integer should be actively
> discouraged. While it might work in this case, some day somebody will
> copy-paste this to a place where the len variable doesn't have
> sizeof==4.

This cast to turn on unsigned (or signed) parsing is the only nontrivial place.

All I can say is that C programmer should be diligent about types.
Equivalent strtol() code has silent truncation. Equivalent code with any other
real function which accepts pointer has the same problem -- direct cast.

We have to live with it, I guess.

>> @@ -68,19 +68,21 @@ static int match_one(char *s, const char *p, substring_t 
>> args[])
>>   break;
>>   }
>>   case 'd':
>> - simple_strtol(s, &args[argc].to, 0);
>> + /* anonymous variable */
>> + len = parse_integer(s, 0, &(int []){0}[0]);
>>   goto num;
>>   case 'u':
>> - simple_strtoul(s, &args[argc].to, 0);
>> + len = parse_integer(s, 0, &(unsigned int []){0}[0]);
>>   goto num;
>>   case 'o':
>> - simple_strtoul(s, &args[argc].to, 8);
>> + len = parse_integer(s, 8, &(unsigned int []){0}[0]);
>>   goto num;
>>   case 'x':
>> - simple_strtoul(s, &args[argc].to, 16);
>> + len = parse_integer(s, 16, &(unsigned int []){0}[0]);
>
> I see that the commit log says "don't be scared", and the first of these
> even has a comment. But is there any reason to be that clever here? I
> see a couple of downsides:
>
> * gcc has to initialize some stack memory to 0, since it cannot know it
>   is only an output parameter.

Yes.

> * things like this usually consume an excessive amount of stack. I
>   haven't been able to try your patches, but a simplified version of the
>   above shows that gcc doesn't use the same stack slots for the various
>   anonymous variables.

Yes.

> * It's unreadable, despite the comment and the commit log.

> I suggest using the more obvious approach of declaring a union on the
> stack and pass the address of the appropriate member.

Union will work.

No that it matters, it's a filesystem option parsing code
which is executed rarely near the top of sys_mount(),
not exactly perfomance bottleneck.

It's a shame to lose such clever hack :-(
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 05/10] parse_integer: convert lib/

2015-05-04 Thread Rasmus Villemoes
On Sat, May 02 2015, Alexey Dobriyan  wrote:

> match_number() needlessly allocates/duplicates memory,
> parsing can be done straight from original string.

I suppose that's true, but the patch doesn't seem to do anything about
it? It's probably better to do in a separate cleanup anyway, but then
maybe this belongs as a comment below ---.


> Signed-off-by: Alexey Dobriyan 
> ---
>
>  lib/cmdline.c |   36 ++--
>  lib/parser.c  |   29 -
>  lib/swiotlb.c |2 +-
>  3 files changed, 31 insertions(+), 36 deletions(-)
>
> --- a/lib/cmdline.c
> +++ b/lib/cmdline.c
> @@ -27,7 +27,7 @@ static int get_range(char **str, int *pint)
>   int x, inc_counter, upper_range;
>  
>   (*str)++;
> - upper_range = simple_strtol((*str), NULL, 0);
> + parse_integer(*str, 0, &upper_range);
>   inc_counter = upper_range - *pint;
>   for (x = *pint; x < upper_range; x++)
>   *pint++ = x;
> @@ -51,13 +51,14 @@ static int get_range(char **str, int *pint)
>  
>  int get_option(char **str, int *pint)
>  {
> - char *cur = *str;
> + int len;
>  
> - if (!cur || !(*cur))
> + if (!str || !*str)
>   return 0;
> - *pint = simple_strtol(cur, str, 0);
> - if (cur == *str)
> + len = parse_integer(*str, 0, pint);
> + if (len < 0)
>   return 0;
> + *str += len;
>   if (**str == ',') {
>   (*str)++;
>   return 2;
> @@ -126,38 +127,37 @@ EXPORT_SYMBOL(get_options);
>  
>  unsigned long long memparse(const char *ptr, char **retptr)
>  {
> - char *endptr;   /* local pointer to end of parsed string */
> + unsigned long long val;
>  
> - unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
> -
> - switch (*endptr) {
> + ptr += parse_integer(ptr, 0, &val);

This seems wrong. simple_strtoull used to "sanitize" the return value
from the (old) _parse_integer, so that endptr still points into the
given string. Unconditionally adding the result from parse_integer may
make ptr point far before the actual string, into who-knows-what.

> + switch (*ptr) {
>   case 'E':
>   case 'e':
> - ret <<= 10;
> + val <<= 10;
>   case 'P':
>   case 'p':
> - ret <<= 10;
> + val <<= 10;
>   case 'T':
>   case 't':
> - ret <<= 10;
> + val <<= 10;
>   case 'G':
>   case 'g':
> - ret <<= 10;
> + val <<= 10;
>   case 'M':
>   case 'm':
> - ret <<= 10;
> + val <<= 10;
>   case 'K':
>   case 'k':
> - ret <<= 10;
> - endptr++;
> + val <<= 10;
> + ptr++;
>   default:
>   break;
>   }
>  
>   if (retptr)
> - *retptr = endptr;
> + *retptr = (char *)ptr;

And here we propagate that to the caller.

> - return ret;
> + return val;
>  }
>  EXPORT_SYMBOL(memparse);
>  
> --- a/lib/parser.c
> +++ b/lib/parser.c
> @@ -44,7 +44,7 @@ static int match_one(char *s, const char *p, substring_t 
> args[])
>   p = meta + 1;
>  
>   if (isdigit(*p))
> - len = simple_strtoul(p, (char **) &p, 10);
> + p += parse_integer(p, 10, (unsigned int *)&len);

Hm, I think passing cast expressions to parse_integer should be actively
discouraged. While it might work in this case, some day somebody will
copy-paste this to a place where the len variable doesn't have
sizeof==4. 

>   else if (*p == '%') {
>   if (*s++ != '%')
>   return 0;
> @@ -68,19 +68,21 @@ static int match_one(char *s, const char *p, substring_t 
> args[])
>   break;
>   }
>   case 'd':
> - simple_strtol(s, &args[argc].to, 0);
> + /* anonymous variable */
> + len = parse_integer(s, 0, &(int []){0}[0]);
>   goto num;
>   case 'u':
> - simple_strtoul(s, &args[argc].to, 0);
> + len = parse_integer(s, 0, &(unsigned int []){0}[0]);
>   goto num;
>   case 'o':
> - simple_strtoul(s, &args[argc].to, 8);
> + len = parse_integer(s, 8, &(unsigned int []){0}[0]);
>   goto num;
>   case 'x':
> - simple_strtoul(s, &args[argc].to, 16);
> + len = parse_integer(s, 16, &(unsigned int []){0}[0]);

I see that the commit log says "don't be scared", and the first of these
even has a comment. But is there any reason to be that clever here? I
see a couple of downsides:

* gcc has to initialize some stack memory to 0, since it cannot know it
  is only an output parameter.

* things like this usually consume an excessive amount of stack. I
  haven't been abl

[PATCH 05/10] parse_integer: convert lib/

2015-05-01 Thread Alexey Dobriyan
Convert away lib/ from deprecated simple_strto*() interfaces.

"&(int []){0}[0]" expression is anonymous variable, don't be scared.
Filesystem option parser code does parsing 1.5 times: first to know
boundaries of a value (args[0].from, args[0].to) and then actual
parsing with match_number(). Noody knows why it does that.

match_number() needlessly allocates/duplicates memory,
parsing can be done straight from original string.

Signed-off-by: Alexey Dobriyan 
---

 lib/cmdline.c |   36 ++--
 lib/parser.c  |   29 -
 lib/swiotlb.c |2 +-
 3 files changed, 31 insertions(+), 36 deletions(-)

--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -27,7 +27,7 @@ static int get_range(char **str, int *pint)
int x, inc_counter, upper_range;
 
(*str)++;
-   upper_range = simple_strtol((*str), NULL, 0);
+   parse_integer(*str, 0, &upper_range);
inc_counter = upper_range - *pint;
for (x = *pint; x < upper_range; x++)
*pint++ = x;
@@ -51,13 +51,14 @@ static int get_range(char **str, int *pint)
 
 int get_option(char **str, int *pint)
 {
-   char *cur = *str;
+   int len;
 
-   if (!cur || !(*cur))
+   if (!str || !*str)
return 0;
-   *pint = simple_strtol(cur, str, 0);
-   if (cur == *str)
+   len = parse_integer(*str, 0, pint);
+   if (len < 0)
return 0;
+   *str += len;
if (**str == ',') {
(*str)++;
return 2;
@@ -126,38 +127,37 @@ EXPORT_SYMBOL(get_options);
 
 unsigned long long memparse(const char *ptr, char **retptr)
 {
-   char *endptr;   /* local pointer to end of parsed string */
+   unsigned long long val;
 
-   unsigned long long ret = simple_strtoull(ptr, &endptr, 0);
-
-   switch (*endptr) {
+   ptr += parse_integer(ptr, 0, &val);
+   switch (*ptr) {
case 'E':
case 'e':
-   ret <<= 10;
+   val <<= 10;
case 'P':
case 'p':
-   ret <<= 10;
+   val <<= 10;
case 'T':
case 't':
-   ret <<= 10;
+   val <<= 10;
case 'G':
case 'g':
-   ret <<= 10;
+   val <<= 10;
case 'M':
case 'm':
-   ret <<= 10;
+   val <<= 10;
case 'K':
case 'k':
-   ret <<= 10;
-   endptr++;
+   val <<= 10;
+   ptr++;
default:
break;
}
 
if (retptr)
-   *retptr = endptr;
+   *retptr = (char *)ptr;
 
-   return ret;
+   return val;
 }
 EXPORT_SYMBOL(memparse);
 
--- a/lib/parser.c
+++ b/lib/parser.c
@@ -44,7 +44,7 @@ static int match_one(char *s, const char *p, substring_t 
args[])
p = meta + 1;
 
if (isdigit(*p))
-   len = simple_strtoul(p, (char **) &p, 10);
+   p += parse_integer(p, 10, (unsigned int *)&len);
else if (*p == '%') {
if (*s++ != '%')
return 0;
@@ -68,19 +68,21 @@ static int match_one(char *s, const char *p, substring_t 
args[])
break;
}
case 'd':
-   simple_strtol(s, &args[argc].to, 0);
+   /* anonymous variable */
+   len = parse_integer(s, 0, &(int []){0}[0]);
goto num;
case 'u':
-   simple_strtoul(s, &args[argc].to, 0);
+   len = parse_integer(s, 0, &(unsigned int []){0}[0]);
goto num;
case 'o':
-   simple_strtoul(s, &args[argc].to, 8);
+   len = parse_integer(s, 8, &(unsigned int []){0}[0]);
goto num;
case 'x':
-   simple_strtoul(s, &args[argc].to, 16);
+   len = parse_integer(s, 16, &(unsigned int []){0}[0]);
num:
-   if (args[argc].to == args[argc].from)
+   if (len < 0)
return 0;
+   args[argc].to = args[argc].from + len;
break;
default:
return 0;
@@ -127,10 +129,8 @@ EXPORT_SYMBOL(match_token);
  */
 static int match_number(substring_t *s, int *result, int base)
 {
-   char *endp;
char *buf;
int ret;
-   long val;
size_t len = s->to - s->from;
 
buf = kmalloc(len + 1, GFP_KERNEL);
@@ -139,16 +139,11 @@ static int match_number(substring_t *s, int *result, int 
base)
memcpy(buf, s->from, len);
buf[len] = '\0';
 
-   ret = 0;
-   val = simple_strtol(buf, &endp, base);
-   if (endp == buf)
-   ret = -EINVAL;
-   else if