Currently v*scanf functions are broken and crash when are called with more
than 30 arguments in va_list. This is because va_list v*scanf functions are
redirected to variadic *scanf functions and this redirect implemented in
scanf.S file has fixed limit for 30 arguments.

Number of arguments for msvcrt *scanf function can be determined from
format string by counting number of '%' characters which is the upper
limit. *scanf functions would not access more arguments than this number.
Every scanf parameter is pointer, it has fixed size and so upper stack size
limit can be exactly calculated.

Fix this scanf.S redirect implementation by dynamically allocating stack
for upper limit of pointer parameters.

---

I have tested this patch for i686 and x86_64. Both ARM (arm32 and aarch64)
changes are untested, so please test it if vsscanf() on these platforms
still works.

With this patch following code works fine without any crashing.
Compile for msvcrt with -std=c89 or -D__USE_MINGW_ANSI_STDIO=0

  #include <stdio.h>
  #include <stdarg.h>

  __attribute__((__format__(scanf, 2, 3)))
  static int call_vsscanf(const char *str, const char *format, ...)
  {
    int ret;
    va_list ap;
    va_start(ap, format);
    ret = vsscanf(str, format, ap);
    va_end(ap);
    return ret;
  }

  int main()
  {
    char b[51];
    call_vsscanf(
      "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY",
      "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c"
      "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c",
      &b[0],&b[1],&b[2],&b[3],&b[4],&b[5],&b[6],&b[7],&b[8],&b[9],&b[10],
      &b[11],&b[12],&b[13],&b[14],&b[15],&b[16],&b[17],&b[18],&b[19],&b[20],
      &b[21],&b[22],&b[23],&b[24],&b[25],&b[26],&b[27],&b[28],&b[29],&b[30],
      &b[31],&b[32],&b[33],&b[34],&b[35],&b[36],&b[37],&b[38],&b[39],&b[40],
      &b[41],&b[42],&b[43],&b[44],&b[45],&b[46],&b[47],&b[48],&b[49],&b[50]
    );
    printf("b=%.51s\n", b);
    return 0;
  }

---

Changes in v2:
* Change test code to scan odd number of arguments
* Keep x86_64 stack 16byte aligned (use leaq + andq)
* Include all Martin's ARM assembler changes
* Rename __ms_*scanf_arg_count_internal to __ms_*scanf_max_arg_count_internal
  and change implementation to count only number of '%' characters
---
 mingw-w64-crt/Makefile.am                     |   2 +
 mingw-w64-crt/stdio/scanf.S                   | 177 ++++++++++--------
 mingw-w64-crt/stdio/scanf2-argcount-char.c    |   9 +
 .../stdio/scanf2-argcount-template.c          |  18 ++
 mingw-w64-crt/stdio/scanf2-argcount-wchar.c   |   9 +
 mingw-w64-crt/stdio/vfscanf.c                 |   6 +-
 mingw-w64-crt/stdio/vfwscanf.c                |   6 +-
 mingw-w64-crt/stdio/vsscanf.c                 |   6 +-
 mingw-w64-crt/stdio/vswscanf.c                |   6 +-
 9 files changed, 161 insertions(+), 78 deletions(-)
 create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-char.c
 create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-template.c
 create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-wchar.c

diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index 8996b65dc075..07531c12ddc2 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -494,6 +494,7 @@ src_libmingwex=\
   misc/wmemset.c         misc/ftw.c                 misc/ftw64.c            
misc/mingw-access.c          \
   \
   stdio/mingw_pformat.h    \
+  stdio/scanf2-argcount-char.c stdio/scanf2-argcount-wchar.c \
   stdio/vfscanf2.S         stdio/vfwscanf2.S         stdio/vscanf2.S          
stdio/vsscanf2.S          stdio/vswscanf2.S \
   stdio/vwscanf2.S         stdio/strtok_r.c          stdio/scanf.S \
   stdio/_Exit.c            stdio/_findfirst64i32.c   stdio/_findnext64i32.c   
stdio/_fstat.c \
@@ -2228,6 +2229,7 @@ EXTRA_DIST += revstamp.h \
   profile/gcrt0.c \
   profile/COPYING \
   profile/CYGWIN_LICENSE \
+  stdio/scanf2-argcount-template.c \
   stdio/scanf2-template.S
 
 DISTCHECK_CONFIGURE_FLAGS = --host=$(host_triplet) $(withsys)
diff --git a/mingw-w64-crt/stdio/scanf.S b/mingw-w64-crt/stdio/scanf.S
index 1e0bed9452ac..323a8d5a859d 100644
--- a/mingw-w64-crt/stdio/scanf.S
+++ b/mingw-w64-crt/stdio/scanf.S
@@ -9,17 +9,14 @@
    The goal of this routine is to turn a call to v*scanf into a call to
    s*scanf.  This is needed because mingw-w64 uses msvcr100.dll, which doesn't
    support the v*scanf functions instead of msvcr120.dll which does.
-   Unfortunately, there is no defined way to know exactly how big a va_list
-   is, so we use a hard-coded buffer.
-
-   I suppose a sufficiently-motivated person could try to parse the format
-   to figure out how many tokens there are... */
+*/
 
 /* The function prototype here is (essentially):
 
-   int __ms_vsscanf_internal (void *s,
+   int __ms_v*scanf_internal (void *s,
                             void *format,
                             void *arg,
+                            size_t count,
                             void *func);
 
    I say 'essentially' because passing a function pointer as void in ISO
@@ -37,19 +34,6 @@
         */
     .def __argtos;    .scl    2;    .type    32;    .endef
 
-    /* The max number of pointers we support.  Must be an even number
-       to keep the 64bit stack 16byte aligned.  Must not be less than 4.  */
-    .equ entries, 30
-
-    /* 64bit pointers are 8 bytes.  */
-    .equ sizeof, 8
-
-    /* Size of our buffer.  */
-    .equ iBytes, entries * sizeof
-
-    /* Stack space for first 2 args to s*scanf.  */
-    .equ iOffset, (2 * sizeof)
-
     .seh_proc __argtos
 __argtos:
 
@@ -58,48 +42,58 @@ __argtos:
       - format must be in rdx.  That's where it is on entry.
       - The first pointer in arg must be in r8. arg is in r8 on entry.
       - The second pointer in arg must be in r9. arg is in r8 on entry.
-      - The ($entries - 2) other pointers in arg must be on the stack,
+      - The (count - 2) other pointers in arg must be on the stack,
        starting 32bytes into rsp.  */
 
-    /* We need enough room to shadow (s + format)
-       + (enough room for all the other args).  */
-    subq $(iOffset + iBytes), %rsp
-    .seh_stackalloc iOffset + iBytes
+    pushq %rbp
+    .seh_pushreg %rbp
+    movq %rsp, %rbp
+    .seh_setframe %rbp, 0
 
+    /* We need to always reserve space to shadow 4 parameters.  */
+    subq $32, %rsp
+    .seh_stackalloc 32
     .seh_endprologue
 
-    /* We are going to copy $entries pointers from arg to our
-       local stack.  Except the first 2, since they will be
-       loaded in registers.  */
-    movq $entries - 2, %r10 /* # of ptrs to copy.  */
-
-    /* The first 32 bytes are in registers, but by spec, space
-          must still be reserved for them on the stack.  Put the
+    movq 48(%rbp), %r10 /* func. */
+
+    /* We need enough room to shadow all the other args.
+       Except the first 2, since they will be loaded in registers.  */
+    cmpq $2, %r9 /* count. */
+    jbe .SKIP
+    subq $2, %r9 /* # of ptrs to copy.  */
+    /* Calculate stack size (arg is 8byte) and keep the stack 16byte aligned. 
*/
+    leaq 8(, %r9, 8), %rax /* %rax = (%r9 + 1) * 8 */
+    andq $-16, %rax
+    subq %rax, %rsp
+
+    /* We are going to copy parameters from arg to our local stack.
+       The first 32 bytes are in registers, but by spec, space
+       must still be reserved for them on the stack.  Put the
        rest of the pointers in the stack after that.  */
     lea 32(%rsp), %r11 /* dst.  */
 
 .LOOP:
-    subq $1, %r10
+    subq $1, %r9
 
     /* Use 16 to skip over the first 2 pointers.  */
-    movq 16(%r8, %r10, 8), %rax
-    movq %rax, (%r11, %r10, 8)
+    movq 16(%r8, %r9, 8), %rax
+    movq %rax, (%r11, %r9, 8)
     jnz .LOOP
 
-    /* r9 contains the routine we are going to call.  Since we are about to
-       overwrite it, move it somewhere safe.  */
-    movq %r9, %r10
-
+.SKIP:
     /* The stack is now correctly populated, and so are rcx and rdx.
        But we need to load the last 2 regs before making the call.  */
     movq 0x8(%r8), %r9 /* 2nd dest location (may be garbage if only 1 arg).  */
-    movq (%r8), %r8 /* 1st dest location.  */
+    movq (%r8), %r8 /* 1st dest location (may be garbage if no arg).  */
 
     /* Make the call.  */
     callq *%r10
 
-    addq $(iOffset + iBytes), %rsp
+    /* Restore stack.  */
+    movq %rbp, %rsp
 
+    popq %rbp
     retq
     .seh_endproc
 
@@ -113,31 +107,23 @@ __argtos:
         */
     .def __argtos;    .scl    2;    .type    32;    .endef
 
-    /* The max number of pointers we support.  Must not be less than 1.  */
-    .equ entries, 30
-
-    /* 64bit pointers are 8 bytes.  */
-    .equ sizeof, 4
-
-    /* Size of our buffer.  */
-    .set iBytes, entries * sizeof
-
-    /* Stack space for first 2 args to s*scanf.  */
-    .equ iOffset, (2 * sizeof)
-
 __argtos:
     pushl %ebp
     movl %esp, %ebp
     pushl %edi
+    pushl %ebx
 
     /* Reserve enough stack space for everything.
 
        Stack usage will look like:
        4 bytes - s
        4 bytes - format
-       (iBytes) bytes - variable # of parameters for sscanf (all ptrs).  */
+       4*count bytes - variable # of parameters for sscanf (all ptrs).  */
 
-    subl $(iOffset + iBytes), %esp
+    movl 20(%ebp), %ebx  /* count.  */
+    addl $2, %ebx  /* s + format.  */
+    sall $2, %ebx  /* (count + 2) * 4.  */
+    subl %ebx, %esp
 
     /* Write out s and format where they need to be for the sscanf call.  */
     movl 8(%ebp), %eax
@@ -145,10 +131,12 @@ __argtos:
     movl 12(%ebp), %edx
     movl %edx, 0x4(%esp)  /* format.  */
 
-    /* We are going to copy $entries pointers from arg to our
+    /* We are going to copy _count_ pointers from arg to our
        local stack.  */
-    movl $entries, %ecx /* # of ptrs to copy.  */
-    lea iOffset(%esp), %edi /* dst.  */
+    movl 20(%ebp), %ecx /* # of ptrs to copy.  */
+    testl %ecx, %ecx
+    jz .SKIP
+    lea 8(%esp), %edi /* dst.  */
     movl 16(%ebp), %edx /* src.  */
 
 .LOOP:
@@ -158,13 +146,16 @@ __argtos:
     movl %eax, (%edi, %ecx, 4)
     jnz .LOOP
 
+.SKIP:
     /* The stack is now correctly populated.  */
 
     /* Make the call.  */
-    call *20(%ebp)
+    call *24(%ebp)
 
     /* Restore stack.  */
-    addl $(iOffset + iBytes), %esp
+    addl %ebx, %esp
+
+    popl %ebx
     popl %edi
     leave
 
@@ -178,25 +169,37 @@ __argtos:
     .globl __argtos
 
 __argtos:
-    push    {r4-r7, lr}
-    sub     sp, sp, #128
-    mov     r12, r3
-    mov     r4, sp
+    push    {r4-r8, lr}
+    ldr     r12, [sp, #24]
 
+    cmp     r3, #0
+    beq     2f
+    subs    r3, r3, #1
     ldr     r5, [r2], #4
+    beq     2f
+    subs    r3, r3, #1
     ldr     r6, [r2], #4
-
-    mov     r3, #116
+    beq     2f
+
+    /* Round the number of entries to an even number, to maintain
+     * 8 byte stack alignment. */
+    mov     r8, r3
+    add     r8, r8, #1
+    bic     r8, r8, #1
+    sub     sp, sp, r8, lsl #2
+    mov     r4, sp
 1:  ldr     r7, [r2], #4
+    subs    r3, r3, #1
     str     r7, [r4], #4
-    subs    r3, r3, #4
     bne     1b
 
+2:
     mov     r2, r5
     mov     r3, r6
     blx     r12
-    add     sp, sp, #128
-    pop     {r4-r7, pc}
+
+    add     sp, sp, r8, lsl #2
+    pop     {r4-r8, pc}
 
 #elif defined (__aarch64__)
 
@@ -207,25 +210,51 @@ __argtos:
 __argtos:
     stp     x29, x30, [sp, #-16]!
     mov     x29, sp
-    sub     sp, sp, #256
-    mov     x9, sp
     mov     x10, x2
     mov     x11, x3
+    mov     x12, x4
+
+    cmp     x11, #0
+    b.eq    2f
 
+    subs    x11, x11, #1
     ldr     x2, [x10], #8
+    b.eq    2f
+
+    subs    x11, x11, #1
     ldr     x3, [x10], #8
+    b.eq    2f
+
+    subs    x11, x11, #1
     ldr     x4, [x10], #8
+    b.eq    2f
+
+    subs    x11, x11, #1
     ldr     x5, [x10], #8
+    b.eq    2f
+
+    subs    x11, x11, #1
     ldr     x6, [x10], #8
-    ldr     x7, [x10], #8
+    b.eq    2f
 
-    mov     x12, #240
+    subs    x11, x11, #1
+    ldr     x7, [x10], #8
+    b.eq    2f
+
+    /* Round the number of entries to an even number, to maintain
+     * 16 byte stack alignment. */
+    mov     x13, x11
+    add     x13, x13, #1
+    bic     x13, x13, #1
+    sub     sp, sp, x13, lsl #3
+    mov     x9, sp
 1:  ldr     x13, [x10], #8
+    subs    x11, x11, #1
     str     x13, [x9], #8
-    subs    x12, x12, #8
     b.ne    1b
 
-    blr     x11
+2:
+    blr     x12
     mov     sp, x29
     ldp     x29, x30, [sp], #16
     ret
diff --git a/mingw-w64-crt/stdio/scanf2-argcount-char.c 
b/mingw-w64-crt/stdio/scanf2-argcount-char.c
new file mode 100644
index 000000000000..bc18dc5685f5
--- /dev/null
+++ b/mingw-w64-crt/stdio/scanf2-argcount-char.c
@@ -0,0 +1,9 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#define FUNC __ms_scanf_max_arg_count_internal
+#define TYPE char
+#include "scanf2-argcount-template.c"
diff --git a/mingw-w64-crt/stdio/scanf2-argcount-template.c 
b/mingw-w64-crt/stdio/scanf2-argcount-template.c
new file mode 100644
index 000000000000..fec8a093971b
--- /dev/null
+++ b/mingw-w64-crt/stdio/scanf2-argcount-template.c
@@ -0,0 +1,18 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#include <stddef.h>
+
+size_t FUNC(const TYPE *format);
+size_t FUNC(const TYPE *format)
+{
+  size_t count = 0;
+  for (; *format; format++) {
+    if (*format == (TYPE)'%')
+      count++;
+  }
+  return count;
+}
diff --git a/mingw-w64-crt/stdio/scanf2-argcount-wchar.c 
b/mingw-w64-crt/stdio/scanf2-argcount-wchar.c
new file mode 100644
index 000000000000..cece837a3ebb
--- /dev/null
+++ b/mingw-w64-crt/stdio/scanf2-argcount-wchar.c
@@ -0,0 +1,9 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#define FUNC __ms_wscanf_max_arg_count_internal
+#define TYPE wchar_t
+#include "scanf2-argcount-template.c"
diff --git a/mingw-w64-crt/stdio/vfscanf.c b/mingw-w64-crt/stdio/vfscanf.c
index dab72fe4a640..c3282a3edbf7 100644
--- a/mingw-w64-crt/stdio/vfscanf.c
+++ b/mingw-w64-crt/stdio/vfscanf.c
@@ -11,18 +11,22 @@ extern int __ms_vfscanf_internal (
   FILE * s,
   const char * format,
   va_list arg,
+  size_t count,
   int (*func)(FILE * __restrict__,  const char * __restrict__, ...))
   asm("__argtos");
 
+extern size_t __ms_scanf_max_arg_count_internal (const char * format);
+
 int __ms_vfscanf (FILE * __restrict__ stream, const char * __restrict__ 
format, va_list arg)
 {
+  size_t count = __ms_scanf_max_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined(_ARM64_) || defined(__aarch64__)
-  ret = __ms_vfscanf_internal (stream, format, arg, fscanf);
+  ret = __ms_vfscanf_internal (stream, format, arg, count, fscanf);
 #else
 #error "unknown platform"
 #endif
diff --git a/mingw-w64-crt/stdio/vfwscanf.c b/mingw-w64-crt/stdio/vfwscanf.c
index 52cf9283547b..f8e465d3616c 100644
--- a/mingw-w64-crt/stdio/vfwscanf.c
+++ b/mingw-w64-crt/stdio/vfwscanf.c
@@ -11,19 +11,23 @@ extern int __ms_vfwscanf_internal (
   FILE * s,
   const wchar_t * format,
   va_list arg,
+  size_t count,
   int (*func)(FILE * __restrict__,  const wchar_t * __restrict__, ...))
   asm("__argtos");
 
+extern size_t __ms_wscanf_max_arg_count_internal (const wchar_t * format);
+
 int __ms_vfwscanf (FILE * __restrict__ stream,
   const wchar_t * __restrict__ format, va_list arg)
 {
+  size_t count = __ms_wscanf_max_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined (_ARM64_) || defined (__aarch64__)
-  ret = __ms_vfwscanf_internal (stream, format, arg, fwscanf);
+  ret = __ms_vfwscanf_internal (stream, format, arg, count, fwscanf);
 #else
 #error "unknown platform"
 #endif
diff --git a/mingw-w64-crt/stdio/vsscanf.c b/mingw-w64-crt/stdio/vsscanf.c
index 6c8fe5a56f40..9b3b650ded51 100644
--- a/mingw-w64-crt/stdio/vsscanf.c
+++ b/mingw-w64-crt/stdio/vsscanf.c
@@ -11,19 +11,23 @@ extern int __ms_vsscanf_internal (
   const char * s,
   const char * format,
   va_list arg,
+  size_t count,
   int (*func)(const char * __restrict__,  const char * __restrict__, ...))
   asm("__argtos");
 
+extern size_t __ms_scanf_max_arg_count_internal (const char * format);
+
 int __ms_vsscanf (const char * __restrict__ s,
   const char * __restrict__ format, va_list arg)
 {
+  size_t count = __ms_scanf_max_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined(_ARM64_) || defined(__aarch64__)
-  ret = __ms_vsscanf_internal (s, format, arg, sscanf);
+  ret = __ms_vsscanf_internal (s, format, arg, count, sscanf);
 #else
 #error "unknown platform"
 #endif
diff --git a/mingw-w64-crt/stdio/vswscanf.c b/mingw-w64-crt/stdio/vswscanf.c
index 941ed1205772..01c811b32ee3 100644
--- a/mingw-w64-crt/stdio/vswscanf.c
+++ b/mingw-w64-crt/stdio/vswscanf.c
@@ -11,19 +11,23 @@ extern int __ms_vswscanf_internal (
   const wchar_t * s,
   const wchar_t * format,
   va_list arg,
+  size_t count,
   int (*func)(const wchar_t * __restrict__,  const wchar_t * __restrict__, 
...))
   asm("__argtos");
 
+extern size_t __ms_wscanf_max_arg_count_internal (const wchar_t * format);
+
 int __ms_vswscanf(const wchar_t * __restrict__ s, const wchar_t * __restrict__ 
format,
   va_list arg)
 {
+  size_t count = __ms_wscanf_max_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined(_ARM64_) || defined(__aarch64__)
-  ret = __ms_vswscanf_internal (s, format, arg, swscanf);
+  ret = __ms_vswscanf_internal (s, format, arg, count, swscanf);
 #else
 #error "unknown platform"
 #endif
-- 
2.20.1



_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to