https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412

John Platts <john_platts at hotmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |john_platts at hotmail dot com

--- Comment #28 from John Platts <john_platts at hotmail dot com> ---
The correct way to align the stack to a 32-byte or 64-byte boundary on 64-bit
Windows is to use a frame pointer in a function that requires stack realignment
and then realign the stack to the required alignment once the frame pointer is
set and all of the non-volatile registers used in the function are saved.

class Avx2VectorGenerator {
public:
    virtual __m256i NextVector() = 0;
};

__m256i Example_AVX2_Func(Avx2VectorGenerator* generator, size_t iterations);

Example_AVX2_Func:
    pushq %rbp
    .seh_pushreg %rbp
    pushq %rbx
    .seh_pushreg %rbx
    pushq %rdi
    .seh_pushreg %rdi
    movq %rsp, %rbp
    .seh_setframe %rbp, 0
    .seh_endprologue

    /* Set rbx to generator and rdi to iterations */
    movq %rcx, %rbx
    movq %rdx, %rdi

    /* It is okay to allocate additional stack memory */
    /* and re-align the stack pointer outside of the */
    /* SEH prologue as there is a frame pointer in this */
    /* function */
    subq $64, %rsp
    andq $-32, %rsp

    /* Zero out the result vector */
    vpxor %ymm0, %ymm0, %ymm0

    test %rdi, %rdi
    jz .loop_complete
.loop_iteration_start:
    /* Save the result vector to 32(%rsp) */
    vmovdqa 32(%rsp), ymm0

    /* Move generator into rcx */
    movq %rbx, %rcx
    /* Move the pointer to the NextVector() virtual member func */
    /* into rax */
    movq (%rbx), %rax
    /* Call generator->NextVector() */
    call *(%rax)

    /* Add the result of generator->NextVector() to the result vector */
    vpaddb 32(%rsp), %ymm0, %ymm0

    /* Decrement iterations by 1 */
    sub $1, %rdi

    /* Jump back to the beginning of the loop if iterations is non-zero */
    jnz .loop_iteration_start
.loop_complete:
    lea (%rbp), %rsp
    pop %rdi
    pop %rbx
    pop %rbp
    ret
    .seh_endproc

Reply via email to