arm64

Wojciech Macek Mon, 04 Apr 2016 00:11:59 -0700

Author: wma
Date: Mon Apr  4 07:11:33 2016
New Revision: 297537
URL: https://svnweb.freebsd.org/changeset/base/297537


Log:
  Add bzero.S to ARM64 machdep
  
  Add fille missing from https://svnweb.freebsd.org/changeset/base/297536

Added:
  head/sys/arm64/arm64/bzero.S   (contents, props changed)

Added: head/sys/arm64/arm64/bzero.S
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/arm64/arm64/bzero.S        Mon Apr  4 07:11:33 2016        
(r297537)
@@ -0,0 +1,206 @@
+/*-
+ * Copyright (C) 2016 Cavium Inc.
+ * All rights reserved.
+ *
+ * Developed by Semihalf.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+
+#include "assym.s"
+
+       /*
+        * void bzero(void *p, size_t size)
+        *
+        *  x0 - p
+        *  x1 - size
+        */
+ENTRY(bzero)
+       cbz     x1, ending
+
+       /*
+        * x5 is number of cache lines to zero - calculated later and
+        * will become non-zero if  buffer is long enough to zero by
+        * cache lines (and if it is allowed.)
+        * We need to zero it before proceeding with buffers of size
+        * smaller than 16 bytes - otherwise the x5 will not be
+        * calculated and will retain random value.
+        * "normal" is used for buffers <= 16 bytes and to align buffer
+        * to cache line for buffers bigger than cache line; non-0 x5
+        * after "normal" has completed indicates that it has been used
+        * to align buffer to cache line and now zero by cache lines will
+        * be performed, and x5 is amount of cache lines to loop through.
+        */
+       mov     x5, xzr
+
+       /* No use of cache assisted zero for buffers with size <= 16 */
+       cmp     x1, #0x10
+       b.le    normal
+
+       /*
+        * Load size of line that will be cleaned by dc zva call.
+        * 0 means that the instruction is not allowed
+        */
+       ldr     x7, =dczva_line_size
+       ldr     x7, [x7]
+       cbz     x7, normal
+
+       /*
+        * Buffer must be larger than cache line for using cache zeroing
+        * (and cache line aligned but this is checked after jump)
+        */
+       cmp     x1, x7
+       b.lt    normal
+
+       /*
+        * Calculate number of bytes to cache aligned address (x4) nad
+        * number of full cache lines (x5). x6 is final address to zero.
+        */
+       sub     x2, x7, #0x01
+       mov     x3, -1
+       eor     x3, x3, x2
+       add     x4, x0, x2
+       and     x4, x4, x3
+       subs    x4, x4, x0
+       b.eq    normal
+
+       /* Calculate number of "lines" in buffer */
+       sub     x5, x1, x4
+       rbit    x2, x7
+       clz     x2, x2
+       lsr     x5, x5, x2
+
+       /*
+        * If number of cache lines is 0, we will not be able to zero
+        * by cache lines, so go normal way.
+        */
+       cbz     x5, normal
+       /* x6 is final address to zero */
+       add     x6, x0, x1
+
+       /*
+        * We are here because x5 is non-0 so normal will be used to
+        * align buffer before cache zeroing. x4 holds number of bytes
+        * needed for alignment.
+        */
+       mov     x1, x4
+
+       /* When jumping here: x0 holds pointer, x1 holds size */
+normal:
+       /*
+        * Get buffer offset into 16 byte aligned address; 0 means pointer
+        * is aligned.
+        */
+       ands    x2, x0, #0x0f
+       b.eq    aligned_to_16
+       /* Calculate one-byte loop runs to 8 byte aligned address. */
+       ands    x2, x2, #0x07
+       mov     x3, #0x08
+       sub     x2, x3, x2
+       /* x2 is number of bytes missing for alignment, x1 is buffer size */
+       cmp     x1, x2
+       csel    x2, x1, x2, le
+       sub     x1, x1, x2
+
+       /*
+        * Byte by byte copy will copy at least enough bytes to align
+        * pointer and at most "size".
+        */
+align:
+       strb    wzr, [x0], #0x01
+       subs    x2, x2, #0x01
+       b.ne    align
+
+       /* Now pointer is aligned to 8 bytes */
+       cmp     x1, #0x10
+       b.lt    lead_out
+       /* 
+        * Check if copy of another 8 bytes is needed to align to 16 byte
+        * address and do it
+        */
+       tbz     x0, #0x03, aligned_to_16
+       str     xzr, [x0], #0x08
+       sub     x1, x1, #0x08
+
+       /* While jumping here: x0 is 16 byte alligned address, x1 is size */
+aligned_to_16:
+       /* If size is less than 16 bytes, use lead_out to copy what remains */
+       cmp     x1, #0x10
+       b.lt    lead_out
+
+       lsr     x2, x1, #0x04
+zero_by_16:
+       stp     xzr, xzr, [x0], #0x10
+       subs    x2, x2, #0x01
+       b.ne    zero_by_16
+
+       /*
+        * Lead out requires addresses to be aligned to 8 bytes. It is used to
+        * zero buffers with sizes < 16 and what can not be zeroed by
+        * zero_by_16 loop.
+        */
+       ands    x1, x1, #0x0f
+       b.eq    lead_out_end
+lead_out:
+       tbz     x1, #0x03, lead_out_dword
+       str     xzr, [x0], #0x08
+lead_out_dword:
+       tbz     x1, #0x02, lead_out_word
+       str     wzr, [x0], #0x04
+lead_out_word:
+       tbz     x1, #0x01, lead_out_byte
+       strh    wzr, [x0], #0x02
+lead_out_byte:
+       tbz     x1, #0x00, lead_out_end
+       strb    wzr, [x0], #0x01
+
+lead_out_end:
+       /*
+        * If x5 is non-zero, this means that normal has been used as
+        * a lead in to align buffer address to cache size
+        */
+       cbz     x5, ending
+
+       /*
+        * Here x5 holds number of lines to zero; x6 is final address of
+        * buffer. x0 is cache line aligned pointer. x7 is cache line size
+        * in bytes
+        */
+cache_line_zero:
+       dc      zva, x0
+       add     x0, x0, x7
+       subs    x5, x5, #0x01
+       b.ne    cache_line_zero
+
+       /* Need to zero remaining bytes? */
+       subs    x1, x6, x0
+       b.ne    normal
+
+ending:
+       ret
+
+END(bzero)
+
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

svn commit: r297537 - head/sys/arm64/arm64

Reply via email to