There is bug with refc or MarkAndSweep gc combine msvc compiler.

And it is reported as <https://github.com/nim-lang/Nim/issues/22648>.

Here is the code:
    
    
    import strformat, strutils
    
    let SIZE = 10_000_000
    let COUNT = 99
    
    proc printf*(formatstr: cstring) {.header: "<stdio.h>", importc: "printf", 
noSideEffect, varargs.}
    
    proc get_all_codes(): seq[string] =
       for i in 1..COUNT:
          result.add align($i, SIZE, '0')
    
    proc get_names(codes: seq[string]): seq[string] =
       for code in codes:
          result.add "abc" & code
    
    proc check_codes() {.exportc.} =
       var lines: seq[string]
       
       var codes = get_all_codes()
       var names = get_names(codes)
       
       # cell_to_check = cast[int](names) - 16         # I add an variable to 
record pointer when gc will output when it is freeed
       printf "--------- start names pointer is %p\n", cast[int](names) - 16
       
       let L = codes.len
       for i in 0..<L:
          var name = names[i]
          var code = codes[i]
          
          printf "Iterations %d code.len = %d name.len = %d\n", i+1, code.len, 
name.len
          
          # this should not be happened
          if code.len != SIZE or name.len != SIZE + 3:
             printf "oops %d %d %d", i+1, code.len, name.len
             quit()
          
          lines.add fmt"{code}:{name}"
       
       printf "ok %d\n", lines.len
    
    when isMainModule:
       check_codes()
    
    
    Run

This the code output, when compile with the msvc -d:release.
    
    
    --------- start names pointer is 000001A5D6390460
    Iterations 1 code.len = 10000000 name.len = 10000003
    Iterations 2 code.len = 10000000 name.len = 10000003
    Iterations 3 code.len = 10000000 name.len = 10000003
    Iterations 4 code.len = 10000000 name.len = 10000003
    --------- dealloc 000001A5D6390460
    Iterations 5 code.len = 10000000 name.len = 10000003
    Iterations 6 code.len = 10000000 name.len = 10000000
    oops 6 10000000 10000000
    
    
    Run

The memory of variable `names` is dealloced at the iteration 4.

The reason of bug is simple, but I don't konw how to fix it.

The refc and MarkAndSweep will scan the stack and register to check if there is 
any address or register reference to the allocated local variable, normally, 
this method is very simple and no problem. But when the msvc optimize the 
previous code, there is no any stack address or register reference to the 
variable `names`, there is only reference to the address `names->data`, so when 
in the MarkAndSweep stage, the memroy of variable `names` is deallocaed.

Here is assembler code:
    
    
    check_codes LABEL NEAR
    ; Note: Prefix bit or byte has no meaning in this context
            push    rbx                                     ; 0000 _ 40: 53
            push    rbp                                     ; 0002 _ 55
            push    rdi                                     ; 0003 _ 57
            push    r12                                     ; 0004 _ 41: 54
            push    r13                                     ; 0006 _ 41: 55
            sub     rsp, 48                                 ; 0008 _ 48: 83. 
EC, 30
            xor     ebx, ebx                                ; 000C _ 33. DB
            mov     qword ptr [rsp+68H], rbx                ; 000E _ 48: 89. 5C 
24, 68
            call    get_all_codes__bug5050545256_u8         ; 0013 _ E8, 
00000000(rel)
            mov     rcx, rax                                ; 0018 _ 48: 8B. C8
            mov     rdi, rax                                ; 001B _ 48: 8B. F8
            call    get_names__bug5050545256_u19            ; 001E _ E8, 
00000000(rel)
            lea     rcx, ptr 
[??_C@_0CF@CADBCMPJ@?9?9?9?9?9?9?9?9?9?5start?5names?5pointer?5is@]; 0023 _ 48: 
8D. 0D, 00000000(rel)
            mov     r13, rax                                ; 002A _ 4C: 8B. E8 
                                        # r13 save the names
            lea     rdx, ptr [rax-10H]                      ; 002D _ 48: 8D. 
50, F0
            mov     qword ptr [cell_to_check__system_u5847], rdx; 0031 _ 48: 
89. 15, 00000000(rel)
            call    qword ptr [__imp_printf]                ; 0038 _ FF. 15, 
00000000(rel)
            test    rdi, rdi                                ; 003E _ 48: 85. FF
            jz      ?_006                                   ; 0041 _ 74, 05
            mov     r12, qword ptr [rdi]                    ; 0043 _ 4C: 8B. 27
            jmp     ?_007                                   ; 0046 _ EB, 03
    
    ?_006:  mov     r12, rbx                                ; 0048 _ 4C: 8B. E3
    ?_007:  mov     qword ptr [rsp+70H], rsi                ; 004B _ 48: 89. 74 
24, 70
            mov     rbp, rbx                                ; 0050 _ 48: 8B. EB
            mov     qword ptr [rsp+28H], r14                ; 0053 _ 4C: 89. 74 
24, 28
            mov     qword ptr [rsp+20H], r15                ; 0058 _ 4C: 89. 7C 
24, 20
            test    r12, r12                                ; 005D _ 4D: 85. E4
            jle     ?_019                                   ; 0060 _ 0F 8E, 
0000015C
            lea     r14, ptr [rdi+10H]                      ; 0066 _ 4C: 8D. 
77, 10
            sub     r13, rdi                                ; 006A _ 4C: 2B. EF 
                                        # r13 is no longer point to names
    ; Filling space: 3H
    ; Filler type: Multi-byte NOP
    ;       db 0FH, 1FH, 00H
    
    ALIGN   8
    ?_008:  mov     rcx, qword ptr [r14+r13]                ; 0070 _ 4B: 8B. 0C 
2E
    
    
    Run

Reply via email to