The optimized memset uses the dc opcode, which causes problems when the
cache is disabled. This patch adds a check if the cache is disabled and
uses a very simple memset implementation in this case. Otherwise the
optimized version is used.

Signed-off-by: Stefan Roese <s...@denx.de>

---

(no changes since v4)

Changes in v4:
- Use macros instead of register names, following the optimized code
- Add zero size check

Changes in v2:
- New patch

 arch/arm/lib/memset-arm64.S | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/arch/arm/lib/memset-arm64.S b/arch/arm/lib/memset-arm64.S
index 710f6f582cad..ee9f9a96cfe6 100644
--- a/arch/arm/lib/memset-arm64.S
+++ b/arch/arm/lib/memset-arm64.S
@@ -11,6 +11,7 @@
  *
  */
 
+#include <asm/macro.h>
 #include "asmdefs.h"
 
 #define dstin  x0
@@ -25,6 +26,37 @@ ENTRY (memset)
        PTR_ARG (0)
        SIZE_ARG (2)
 
+       /*
+        * The optimized memset uses the dc opcode, which causes problems
+        * when the cache is disabled. Let's check if the cache is disabled
+        * and use a very simple memset implementation in this case. Otherwise
+        * jump to the optimized version.
+        */
+       switch_el x6, 3f, 2f, 1f
+3:     mrs     x6, sctlr_el3
+       b       0f
+2:     mrs     x6, sctlr_el2
+       b       0f
+1:     mrs     x6, sctlr_el1
+0:
+       tst     x6, #CR_C
+       bne     9f
+
+       /*
+        * A very "simple" memset implementation without the use of the
+        * dc opcode. Can be run with caches disabled.
+        */
+       mov     x3, #0x0
+       cmp     count, x3       /* check for zero length */
+       beq     8f
+4:     strb    valw, [dstin, x3]
+       add     x3, x3, #0x1
+       cmp     count, x3
+       bne     4b
+8:     ret
+9:
+
+       /* Here the optimized memset version starts */
        dup     v0.16B, valw
        add     dstend, dstin, count
 
-- 
2.33.0

Reply via email to