On 5/7/18, Oliver Pinter <oliver.pin...@hardenedbsd.org> wrote:
> On 5/7/18, Mateusz Guzik <m...@freebsd.org> wrote:
>> Author: mjg
>> Date: Mon May  7 15:07:28 2018
>> New Revision: 333324
>> URL: https://svnweb.freebsd.org/changeset/base/333324
>>
>> Log:
>>   amd64: replace libkern's memset and memmove with assembly variants
>>
>>   memmove is repurposed bcopy (arguments swapped, return value added)
>>   The libkern variant is a wrapper around bcopy, so this is a big
>>   improvement.
>>
>>   memset is repurposed memcpy. The librkern variant is doing fishy stuff,
>>   including branching on 0 and calling bzero.
>>
>>   Both functions are rather crude and subject to partial depessimization.
>>
>>   This is a soft prerequisite to adding variants utilizing the
>>   'Enhanced REP MOVSB/STOSB' bit and let the kernel patch at runtime.
>>
>> Modified:
>>   head/sys/amd64/amd64/support.S
>>   head/sys/conf/files.amd64
>>
>> Modified: head/sys/amd64/amd64/support.S
>> ==============================================================================
>> --- head/sys/amd64/amd64/support.S   Mon May  7 15:07:26 2018        
>> (r333323)
>> +++ head/sys/amd64/amd64/support.S   Mon May  7 15:07:28 2018        
>> (r333324)
>> @@ -162,6 +162,58 @@ ENTRY(bcopy)
>>  END(bcopy)
>>
>>  /*
>> + * memmove(dst, src, cnt)
>> + *         rdi, rsi, rdx
>> + * Original by:
>> + *  w...@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
>> + */
>> +ENTRY(memmove)
>> +    PUSH_FRAME_POINTER
>> +    movq    %rdi,%r9
>> +    movq    %rdx,%rcx
>> +
>> +    movq    %rdi,%rax
>> +    subq    %rsi,%rax
>> +    cmpq    %rcx,%rax                       /* overlapping && src < dst? */
>> +    jb      1f
>> +
>> +    shrq    $3,%rcx                         /* copy by 64-bit words */
>> +    rep
>> +    movsq
>> +    movq    %rdx,%rcx
>> +    andq    $7,%rcx                         /* any bytes left? */
>> +    rep
>> +    movsb
>> +    movq    %r9,%rax
>> +    POP_FRAME_POINTER
>> +    ret
>> +
>> +    /* ALIGN_TEXT */
>> +1:
>> +    addq    %rcx,%rdi                       /* copy backwards */
>> +    addq    %rcx,%rsi
>> +    decq    %rdi
>> +    decq    %rsi
>> +    andq    $7,%rcx                         /* any fractional bytes? */
>> +    std
>> +    rep
>> +    movsb
>> +    movq    %rdx,%rcx                       /* copy remainder by 32-bit 
>> words */
>> +    shrq    $3,%rcx
>> +    subq    $7,%rsi
>> +    subq    $7,%rdi
>> +    rep
>> +    movsq
>> +    cld
>> +    movq    %r9,%rax
>> +    POP_FRAME_POINTER
>> +    ret
>> +END(memmove)
>> +
>> +/*
>> + * memcpy(dst, src, len)
>> + *        rdi, rsi, rdx
>> + *
>>   * Note: memcpy does not support overlapping copies
>>   */
>>  ENTRY(memcpy)
>> @@ -178,6 +230,27 @@ ENTRY(memcpy)
>>      POP_FRAME_POINTER
>>      ret
>>  END(memcpy)
>> +
>> +/*
>> + * memset(dst, c,   len)
>> + *        rdi, rsi, rdx
>> + */
>> +ENTRY(memset)
>> +    PUSH_FRAME_POINTER
>> +    movq    %rdi,%r9
>> +    movq    %rdx,%rcx
>> +    movq    %rsi,%rax
>> +    shrq    $3,%rcx
>> +    rep
>> +    stosq
>
> According to Intel SDM stosq stores the whole RAX into destination,
> and then increments the destination register with 8. This
> implementation is wrong, since the c is a char, and the The RAX looks
> like 000000CC, so the stored patter would be 000000CC * SIZE / 8 * 8 +
> CC * SIZE % 8 in destination buffer.

Attached the proof.

>
>> +    movq    %rdx,%rcx
>> +    andq    $7,%rcx
>> +    rep
>> +    stosb
>> +    movq    %r9,%rax
>> +    POP_FRAME_POINTER
>> +    ret
>> +END(memset)
>>
>>  /*
>>   * pagecopy(%rdi=from, %rsi=to)
>>
>> Modified: head/sys/conf/files.amd64
>> ==============================================================================
>> --- head/sys/conf/files.amd64        Mon May  7 15:07:26 2018        
>> (r333323)
>> +++ head/sys/conf/files.amd64        Mon May  7 15:07:28 2018        
>> (r333324)
>> @@ -620,8 +620,6 @@ isa/vga_isa.c                    optional        vga
>>  kern/kern_clocksource.c             standard
>>  kern/link_elf_obj.c         standard
>>  libkern/x86/crc32_sse42.c   standard
>> -libkern/memmove.c           standard
>> -libkern/memset.c            standard
>>  #
>>  # IA32 binary support
>>  #
>> _______________________________________________
>> svn-src-head@freebsd.org mailing list
>> https://lists.freebsd.org/mailman/listinfo/svn-src-head
>> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
>>
>
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to