Author: luporl
Date: Wed Jan 15 20:25:52 2020
New Revision: 356767
URL: https://svnweb.freebsd.org/changeset/base/356767

Log:
  [PPC64] memcpy/memmove/bcopy optimization
  
  For copies shorter than 512 bytes, the data is copied using plain
  ld/std instructions.
  For 512 bytes or more, the copy is done in 3 phases:
  
  Phase 1: copy from the src buffer until it's aligned at a 16-byte boundary
  Phase 2: copy as many aligned 64-byte blocks from the src buffer as possible
  Phase 3: copy the remaining data, if any
  
  In phase 2, this code uses VSX instructions when available. Otherwise,
  it uses ldx/stdx.
  
  Submitted by: Luis Pires <lffpires_ruabrasil.org> (original version)
  Reviewed by:  jhibbits
  Differential Revision:        https://reviews.freebsd.org/D15118

Added:
  head/lib/libc/powerpc64/string/bcopy.S   (contents, props changed)
  head/lib/libc/powerpc64/string/bcopy_resolver.c   (contents, props changed)
  head/lib/libc/powerpc64/string/bcopy_vsx.S   (contents, props changed)
  head/lib/libc/powerpc64/string/memcpy.S   (contents, props changed)
  head/lib/libc/powerpc64/string/memcpy_resolver.c   (contents, props changed)
  head/lib/libc/powerpc64/string/memcpy_vsx.S   (contents, props changed)
  head/lib/libc/powerpc64/string/memmove.S   (contents, props changed)
  head/lib/libc/powerpc64/string/memmove_resolver.c   (contents, props changed)
  head/lib/libc/powerpc64/string/memmove_vsx.S   (contents, props changed)
Modified:
  head/lib/libc/powerpc64/string/Makefile.inc

Modified: head/lib/libc/powerpc64/string/Makefile.inc
==============================================================================
--- head/lib/libc/powerpc64/string/Makefile.inc Wed Jan 15 19:53:03 2020        
(r356766)
+++ head/lib/libc/powerpc64/string/Makefile.inc Wed Jan 15 20:25:52 2020        
(r356767)
@@ -1,6 +1,15 @@
 # $FreeBSD$
 
 MDSRCS+= \
+       bcopy.S \
+       bcopy_vsx.S \
+       bcopy_resolver.c \
+       memcpy.S \
+       memcpy_vsx.S \
+       memcpy_resolver.c \
+       memmove.S \
+       memmove_vsx.S \
+       memmove_resolver.c \
        strcpy_arch_2_05.S \
        strcpy.c \
        strcpy_resolver.c \

Added: head/lib/libc/powerpc64/string/bcopy.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/bcopy.S      Wed Jan 15 20:25:52 2020        
(r356767)
@@ -0,0 +1,306 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#define BLOCK_SIZE_BITS                        6
+#define BLOCK_SIZE                     (1 << BLOCK_SIZE_BITS)
+#define BLOCK_SIZE_MASK                        (BLOCK_SIZE - 1)
+
+#define MULTI_PHASE_THRESHOLD          512
+
+#ifndef FN_NAME
+#ifdef MEMMOVE
+#define FN_NAME                                __memmove
+WEAK_REFERENCE(__memmove, memmove);
+#else
+#define FN_NAME                                __bcopy
+WEAK_REFERENCE(__bcopy, bcopy);
+#endif
+#endif
+
+/*
+ * r3: dst
+ * r4: src
+ * r5: len
+ */
+
+ENTRY(FN_NAME)
+       cmpld   %r3, %r4                /* src == dst? nothing to do */
+       beqlr-
+       cmpdi   %r5, 0                  /* len == 0? nothing to do */
+       beqlr-
+
+#ifdef MEMMOVE
+       std     %r3, -8(%r1)            /* save dst */
+#else  /* bcopy: swap src/dst */
+       mr      %r0, %r3
+       mr      %r3, %r4
+       mr      %r4, %r0
+#endif
+
+       cmpldi  %r5, MULTI_PHASE_THRESHOLD
+       bge     .Lmulti_phase
+
+       /* align src */
+       cmpd    %r4, %r3                /* forward or backward copy? */
+       blt     .Lbackward_align
+
+       .align 5
+.Lalign:
+       andi.   %r0, %r4, 15
+       beq     .Lsingle_copy
+       lbz     %r0, 0(%r4)
+       addi    %r4, %r4, 1
+       stb     %r0, 0(%r3)
+       addi    %r3, %r3, 1
+       addi    %r5, %r5, -1
+       cmpdi   %r5, 0
+       beq-    .Ldone
+       b       .Lalign
+
+.Lbackward_align:
+       /* advance src and dst to end (past last byte) */
+       add     %r3, %r3, %r5
+       add     %r4, %r4, %r5
+       .align 5
+.Lbackward_align_loop:
+       andi.   %r0, %r4, 15
+       beq     .Lbackward_single_copy
+       lbzu    %r0, -1(%r4)
+       addi    %r5, %r5, -1
+       stbu    %r0, -1(%r3)
+       cmpdi   %r5, 0
+       beq-    .Ldone
+       b       .Lbackward_align_loop
+
+.Lsingle_copy:
+       /* forward copy */
+       li      %r0, 1
+       li      %r8, 16
+       li      %r9, 0
+       b       .Lsingle_phase
+
+.Lbackward_single_copy:
+       /* backward copy */
+       li      %r0, -1
+       li      %r8, -16
+       li      %r9, -15
+       /* point src and dst to last byte */
+       addi    %r3, %r3, -1
+       addi    %r4, %r4, -1
+
+.Lsingle_phase:
+       srdi.   %r6, %r5, 4             /* number of 16-bytes */
+       beq     .Lsingle_1
+
+       /* pre-adjustment */
+       add     %r3, %r3, %r9
+       add     %r4, %r4, %r9
+
+       mtctr   %r6
+       .align 5
+.Lsingle_16_loop:
+       ld      %r6, 0(%r4)
+       ld      %r7, 8(%r4)
+       add     %r4, %r4, %r8
+       std     %r6, 0(%r3)
+       std     %r7, 8(%r3)
+       add     %r3, %r3, %r8
+       bdnz    .Lsingle_16_loop
+
+       /* post-adjustment */
+       sub     %r3, %r3, %r9
+       sub     %r4, %r4, %r9
+
+.Lsingle_1:
+       andi.   %r6, %r5, 0x0f          /* number of 1-bytes */
+       beq     .Ldone                  /* 1-bytes == 0? done */
+
+       mtctr   %r6
+       .align 5
+.Lsingle_1_loop:
+       lbz     %r6, 0(%r4)
+       add     %r4, %r4, %r0           /* increment */
+       stb     %r6, 0(%r3)
+       add     %r3, %r3, %r0           /* increment */
+       bdnz    .Lsingle_1_loop
+
+.Ldone:
+#ifdef MEMMOVE
+       ld      %r3, -8(%r1)            /* restore dst */
+#endif
+       blr
+
+
+.Lmulti_phase:
+       /* set up multi-phase copy parameters */
+
+       /* r7 = bytes before the aligned section of the buffer */
+       andi.   %r6, %r4, 15
+       subfic  %r7, %r6, 16
+       /* r8 = bytes in and after the aligned section of the buffer */
+       sub     %r8, %r5, %r7
+       /* r9 = bytes after the aligned section of the buffer */
+       andi.   %r9, %r8, BLOCK_SIZE_MASK
+       /* r10 = BLOCKS in the aligned section of the buffer */
+       srdi    %r10, %r8, BLOCK_SIZE_BITS
+
+       /* forward or backward copy? */
+       cmpd    %r4, %r3
+       blt     .Lbackward_multi_copy
+
+       /* set up forward copy parameters */
+       std     %r7,  -32(%r1)          /* bytes to copy in phase 1 */
+       std     %r10, -40(%r1)          /* BLOCKS to copy in phase 2 */
+       std     %r9,  -48(%r1)          /* bytes to copy in phase 3 */
+
+       li      %r0, 1                  /* increment for phases 1 and 3 */
+       li      %r5, BLOCK_SIZE         /* increment for phase 2 */
+
+       /* op offsets for phase 2 */
+       li      %r7,  0
+       li      %r8,  16
+       li      %r9,  32
+       li      %r10, 48
+
+       std     %r8, -16(%r1)           /* 16-byte increment (16) */
+       std     %r7, -24(%r1)           /* 16-byte pre/post adjustment (0) */
+
+       b       .Lphase1
+
+.Lbackward_multi_copy:
+       /* set up backward copy parameters */
+       std     %r9,  -32(%r1)          /* bytes to copy in phase 1 */
+       std     %r10, -40(%r1)          /* BLOCKS to copy in phase 2 */
+       std     %r7,  -48(%r1)          /* bytes to copy in phase 3 */
+
+       li      %r0, -1                 /* increment for phases 1 and 3 */
+       add     %r6, %r5, %r0           /* r6 = len - 1 */
+       li      %r5, -BLOCK_SIZE        /* increment for phase 2 */
+       /* advance src and dst to the last position */
+       add     %r3, %r3, %r6
+       add     %r4, %r4, %r6
+
+       /* op offsets for phase 2 */
+       li      %r7,  -15
+       li      %r8,  -31
+       li      %r9,  -47
+       li      %r10, -63
+
+       add     %r6, %r7, %r0           /* r6 = -16 */
+       std     %r6, -16(%r1)           /* 16-byte increment (-16) */
+       std     %r7, -24(%r1)           /* 16-byte pre/post adjustment (-15) */
+
+.Lphase1:
+       ld      %r6, -32(%r1)           /* bytes to copy in phase 1 */
+       cmpldi  %r6, 0                  /* r6 == 0? skip phase 1 */
+       beq+    .Lphase2
+
+       mtctr   %r6
+       .align 5
+.Lphase1_loop:
+       lbz     %r6, 0(%r4)
+       add     %r4, %r4, %r0           /* phase 1 increment */
+       stb     %r6, 0(%r3)
+       add     %r3, %r3, %r0           /* phase 1 increment */
+       bdnz    .Lphase1_loop
+
+.Lphase2:
+       ld      %r6, -40(%r1)           /* BLOCKS to copy in phase 2 */
+       cmpldi  %r6, 0                  /* %r6 == 0? skip phase 2 */
+       beq     .Lphase3
+
+#ifdef FN_PHASE2
+FN_PHASE2
+#else
+       /* save registers */
+       std     %r14, -56(%r1)
+       std     %r15, -64(%r1)
+       std     %r16, -72(%r1)
+       std     %r17, -80(%r1)
+       std     %r18, -88(%r1)
+       std     %r19, -96(%r1)
+       std     %r20, -104(%r1)
+       std     %r21, -112(%r1)
+
+       addi    %r18, %r7, 8
+       addi    %r19, %r8, 8
+       addi    %r20, %r9, 8
+       addi    %r21, %r10, 8
+
+       mtctr   %r6
+       .align 5
+.Lphase2_loop:
+       ldx     %r14, %r7,  %r4
+       ldx     %r15, %r18, %r4
+       ldx     %r16, %r8,  %r4
+       ldx     %r17, %r19, %r4
+       stdx    %r14, %r7,  %r3
+       stdx    %r15, %r18, %r3
+       stdx    %r16, %r8,  %r3
+       stdx    %r17, %r19, %r3
+
+       ldx     %r14, %r9,  %r4
+       ldx     %r15, %r20, %r4
+       ldx     %r16, %r10, %r4
+       ldx     %r17, %r21, %r4
+       stdx    %r14, %r9,  %r3
+       stdx    %r15, %r20, %r3
+       stdx    %r16, %r10, %r3
+       stdx    %r17, %r21, %r3
+
+       add     %r4, %r4, %r5           /* phase 2 increment */
+       add     %r3, %r3, %r5           /* phase 2 increment */
+
+       bdnz    .Lphase2_loop
+
+       /* restore registers */
+       ld      %r14, -56(%r1)
+       ld      %r15, -64(%r1)
+       ld      %r16, -72(%r1)
+       ld      %r17, -80(%r1)
+       ld      %r18, -88(%r1)
+       ld      %r19, -96(%r1)
+       ld      %r20, -104(%r1)
+       ld      %r21, -112(%r1)
+#endif
+
+.Lphase3:
+       /* load registers for transitioning into the single-phase logic */
+       ld      %r5, -48(%r1)           /* bytes to copy in phase 3 */
+       ld      %r8, -16(%r1)           /* 16-byte increment */
+       ld      %r9, -24(%r1)           /* 16-byte pre/post adjustment */
+       b       .Lsingle_phase
+
+END(FN_NAME)
+
+       .section .note.GNU-stack,"",%progbits
+

Added: head/lib/libc/powerpc64/string/bcopy_resolver.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/bcopy_resolver.c     Wed Jan 15 20:25:52 
2020        (r356767)
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <machine/cpu.h>
+#include <machine/ifunc.h>
+
+#define _CAT(a,b)      a##b
+#define CAT(a,b)       _CAT(a,b)
+#define CAT3(a,b,c)    CAT(CAT(a,b),c)
+
+#ifdef MEMCOPY
+#define FN_NAME                memcpy
+#define FN_RET         void *
+#define FN_PARAMS      (void *dst, const void *src, size_t len)
+
+#elif defined(MEMMOVE)
+#define FN_NAME                memmove
+#define FN_RET         void *
+#define FN_PARAMS      (void *dst, const void *src, size_t len)
+
+#else
+#define FN_NAME                bcopy
+#define FN_RET         void
+#define FN_PARAMS      (const void *src, void *dst, size_t len)
+#endif
+
+#define FN_NAME_NOVSX  CAT(__, FN_NAME)
+#define FN_NAME_VSX    CAT3(__, FN_NAME, _vsx)
+
+FN_RET FN_NAME_NOVSX FN_PARAMS;
+FN_RET FN_NAME_VSX FN_PARAMS;
+
+DEFINE_UIFUNC(, FN_RET, FN_NAME, FN_PARAMS)
+{
+       if (cpu_features & PPC_FEATURE_HAS_VSX)
+               return (FN_NAME_VSX);
+       else
+               return (FN_NAME_NOVSX);
+}

Added: head/lib/libc/powerpc64/string/bcopy_vsx.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/bcopy_vsx.S  Wed Jan 15 20:25:52 2020        
(r356767)
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef FN_NAME
+#define FN_NAME                __bcopy_vsx
+#endif
+
+/*
+ * r3: dst
+ * r4: src
+ * r5: block increment
+ * r6: blocks to copy
+ * r7/r8/r9/r10: 16-byte offsets to copy
+ */
+
+#define FN_PHASE2 \
+       mtctr   %r6                     ;\
+       .align 5                        ;\
+.Lphase2_loop:                         ;\
+       lxvd2x  %vs6, %r7,  %r4         ;\
+       lxvd2x  %vs7, %r8,  %r4         ;\
+       lxvd2x  %vs8, %r9,  %r4         ;\
+       lxvd2x  %vs9, %r10, %r4         ;\
+       stxvd2x %vs6, %r7,  %r3         ;\
+       stxvd2x %vs7, %r8,  %r3         ;\
+       stxvd2x %vs8, %r9,  %r3         ;\
+       stxvd2x %vs9, %r10, %r3         ;\
+       /* phase 2 increment */         ;\
+       add     %r4, %r4, %r5           ;\
+       add     %r3, %r3, %r5           ;\
+                                        \
+       bdnz    .Lphase2_loop           ;\
+
+#include "bcopy.S"

Added: head/lib/libc/powerpc64/string/memcpy.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/memcpy.S     Wed Jan 15 20:25:52 2020        
(r356767)
@@ -0,0 +1,122 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef FN_NAME
+#define FN_NAME                                __memcpy
+WEAK_REFERENCE(__memcpy, memcpy);
+#define BLOCK_BITS                     4
+#endif
+
+#define BLOCK_BYTES                    (1 << BLOCK_BITS)
+#define BLOCK_MASK                     (BLOCK_BYTES - 1)
+
+/*
+ * r3: dst
+ * r4: src
+ * r5: len
+ */
+ENTRY(FN_NAME)
+       cmpdi   %r5, 0                  /* len == 0? nothing to do */
+       beqlr-
+
+       mr      %r8, %r3                /* save dst */
+
+       /* align src */
+.Lalignment_loop:
+       lbz     %r6, 0(%r4)
+       stb     %r6, 0(%r3)
+       addi    %r3, %r3, 1
+       addi    %r4, %r4, 1
+       addi    %r5, %r5, -1
+       cmpdi   %r5, 0
+       beq     .Lexit
+       andi.   %r0, %r4, BLOCK_MASK
+       bne     .Lalignment_loop
+
+       /* r7 = remaining, non-block, bytes */
+       andi.   %r7, %r5, BLOCK_MASK
+
+       /* Check if there are blocks of BLOCK_BYTES to be copied */
+       xor.    %r5, %r5, %r7
+       beq     .Lcopy_remaining_fix_index_byte
+
+#ifdef FN_COPY_LOOP
+FN_COPY_LOOP
+#else
+       /* Setup to copy word with ldu and stdu */
+       ld      %r6, 0(%r4)
+       ld      %r9, 8(%r4)
+       std     %r6, 0(%r3)
+       std     %r9, 8(%r3)
+       addi    %r5, %r5, -BLOCK_BYTES
+       cmpd    %r5, 0
+       beq     .Lcopy_remaining_fix_index_word
+
+       srdi    %r5, %r5, BLOCK_BITS
+       mtctr   %r5
+.Lcopy_word:
+       ldu     %r6, 16(%r4)
+       ld      %r9,  8(%r4)
+       stdu    %r6, 16(%r3)
+       std     %r9,  8(%r3)
+       bdnz    .Lcopy_word
+
+.Lcopy_remaining_fix_index_word:
+       /* Check if there are remaining bytes */
+       cmpd    %r7, 0
+       beq     .Lexit
+       addi    %r3, %r3, BLOCK_MASK
+       addi    %r4, %r4, BLOCK_MASK
+       b       .Lcopy_remaining
+#endif
+
+.Lcopy_remaining_fix_index_byte:
+       addi    %r4, %r4, -1
+       addi    %r3, %r3, -1
+
+       /* Copy remaining bytes */
+.Lcopy_remaining:
+       mtctr   %r7
+.Lcopy_remaining_loop:
+       lbzu    %r6, 1(%r4)
+       stbu    %r6, 1(%r3)
+       bdnz    .Lcopy_remaining_loop
+
+.Lexit:
+       /* Restore dst */
+       mr      %r3, %r8
+       blr
+
+END(FN_NAME)
+
+       .section .note.GNU-stack,"",%progbits
+

Added: head/lib/libc/powerpc64/string/memcpy_resolver.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/memcpy_resolver.c    Wed Jan 15 20:25:52 
2020        (r356767)
@@ -0,0 +1,4 @@
+/* $FreeBSD$ */
+
+#define MEMCOPY
+#include "bcopy_resolver.c"

Added: head/lib/libc/powerpc64/string/memcpy_vsx.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/memcpy_vsx.S Wed Jan 15 20:25:52 2020        
(r356767)
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ *    be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define FN_NAME                        __memcpy_vsx
+#define BLOCK_BITS             6
+
+/*
+ * r5: bytes to copy (multiple of BLOCK_BYTES)
+ *
+ */
+#define FN_COPY_LOOP \
+       /* Load CTR with number of blocks */             \
+       srdi    %r5, %r5, BLOCK_BITS                    ;\
+       mtctr   %r5                                     ;\
+       /* Prepare indexes to load and store data */     \
+       xor     %r6, %r6, %r6                           ;\
+       li      %r9,  16                                ;\
+       li      %r10, 32                                ;\
+       li      %r11, 48                                ;\
+.Lcopy_vsx_loop:                                        \
+       lxvd2x  %vs6, %r6,  %r4                         ;\
+       lxvd2x  %vs7, %r9,  %r4                         ;\
+       lxvd2x  %vs8, %r10, %r4                         ;\
+       lxvd2x  %vs9, %r11, %r4                         ;\
+       stxvd2x %vs6, %r6,  %r3                         ;\
+       stxvd2x %vs7, %r9,  %r3                         ;\
+       stxvd2x %vs8, %r10, %r3                         ;\
+       stxvd2x %vs9, %r11, %r3                         ;\
+                                                        \
+       addi    %r3, %r3, BLOCK_BYTES                   ;\
+       addi    %r4, %r4, BLOCK_BYTES                   ;\
+       bdnz    .Lcopy_vsx_loop                         ;\
+                                                        \
+       /* Check if there is remaining bytes */          \
+       cmpd    %r7, 0                                  ;\
+       beq             .Lexit                          ;\
+
+#include "memcpy.S"

Added: head/lib/libc/powerpc64/string/memmove.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/memmove.S    Wed Jan 15 20:25:52 2020        
(r356767)
@@ -0,0 +1,4 @@
+/* $FreeBSD$ */
+
+#define MEMMOVE
+#include "bcopy.S"

Added: head/lib/libc/powerpc64/string/memmove_resolver.c
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/memmove_resolver.c   Wed Jan 15 20:25:52 
2020        (r356767)
@@ -0,0 +1,4 @@
+/* $FreeBSD$ */
+
+#define MEMMOVE
+#include "bcopy_resolver.c"

Added: head/lib/libc/powerpc64/string/memmove_vsx.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/lib/libc/powerpc64/string/memmove_vsx.S        Wed Jan 15 20:25:52 
2020        (r356767)
@@ -0,0 +1,5 @@
+/* $FreeBSD$ */
+
+#define MEMMOVE
+#define FN_NAME                __memmove_vsx
+#include "bcopy_vsx.S"
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to