Re: [PATCH V9 13/21] csky: Library functions

2018-10-17 Thread Guo Ren
On Wed, Oct 17, 2018 at 05:24:36PM +0200, Arnd Bergmann wrote:
> On Tue, Oct 16, 2018 at 5:02 AM Guo Ren  wrote:
> 
> > +++ b/arch/csky/lib/delay.c
> > @@ -0,0 +1,39 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +void __delay(unsigned long loops)
> > +{
> > +   asm volatile (
> > +   "mov r0, r0\n"
> > +   "1:declt %0\n"
> > +   "bf 1b"
> > +   : "=r"(loops)
> > +   : "0"(loops));
> > +}
> > +EXPORT_SYMBOL(__delay);
> > +
> > +void __const_udelay(unsigned long xloops)
> > +{
> > +   unsigned long long loops;
> > +
> > +   loops = (unsigned long long)xloops * loops_per_jiffy * HZ;
> > +
> > +   __delay(loops >> 32);
> > +}
> > +EXPORT_SYMBOL(__const_udelay);
> 
> I can't find any indication we discussed this part before. Can you
> explain (in the
> changelog and in a reply) if there is a reliable high-resolution clocksource
> available in the architecture? If you can read the elapsed time here instead 
> of
> using the loops_per_jiffy estimate, that would make this more robust, as
> well as speed up the boot.
> 
> If you can't do that in general, that's fine, and you can add my
I'll add TODO list here, __delay should use get_cycles() and we didn't
implement __udelay() __ndelay() ... We'll improve it next.

Best Regards
 Guo Ren


Re: [PATCH V9 13/21] csky: Library functions

2018-10-17 Thread Guo Ren
On Wed, Oct 17, 2018 at 05:24:36PM +0200, Arnd Bergmann wrote:
> On Tue, Oct 16, 2018 at 5:02 AM Guo Ren  wrote:
> 
> > +++ b/arch/csky/lib/delay.c
> > @@ -0,0 +1,39 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +void __delay(unsigned long loops)
> > +{
> > +   asm volatile (
> > +   "mov r0, r0\n"
> > +   "1:declt %0\n"
> > +   "bf 1b"
> > +   : "=r"(loops)
> > +   : "0"(loops));
> > +}
> > +EXPORT_SYMBOL(__delay);
> > +
> > +void __const_udelay(unsigned long xloops)
> > +{
> > +   unsigned long long loops;
> > +
> > +   loops = (unsigned long long)xloops * loops_per_jiffy * HZ;
> > +
> > +   __delay(loops >> 32);
> > +}
> > +EXPORT_SYMBOL(__const_udelay);
> 
> I can't find any indication we discussed this part before. Can you
> explain (in the
> changelog and in a reply) if there is a reliable high-resolution clocksource
> available in the architecture? If you can read the elapsed time here instead 
> of
> using the loops_per_jiffy estimate, that would make this more robust, as
> well as speed up the boot.
> 
> If you can't do that in general, that's fine, and you can add my
I'll add TODO list here, __delay should use get_cycles() and we didn't
implement __udelay() __ndelay() ... We'll improve it next.

Best Regards
 Guo Ren


Re: [PATCH V9 13/21] csky: Library functions

2018-10-17 Thread Arnd Bergmann
On Tue, Oct 16, 2018 at 5:02 AM Guo Ren  wrote:

> +++ b/arch/csky/lib/delay.c
> @@ -0,0 +1,39 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +void __delay(unsigned long loops)
> +{
> +   asm volatile (
> +   "mov r0, r0\n"
> +   "1:declt %0\n"
> +   "bf 1b"
> +   : "=r"(loops)
> +   : "0"(loops));
> +}
> +EXPORT_SYMBOL(__delay);
> +
> +void __const_udelay(unsigned long xloops)
> +{
> +   unsigned long long loops;
> +
> +   loops = (unsigned long long)xloops * loops_per_jiffy * HZ;
> +
> +   __delay(loops >> 32);
> +}
> +EXPORT_SYMBOL(__const_udelay);

I can't find any indication we discussed this part before. Can you
explain (in the
changelog and in a reply) if there is a reliable high-resolution clocksource
available in the architecture? If you can read the elapsed time here instead of
using the loops_per_jiffy estimate, that would make this more robust, as
well as speed up the boot.

If you can't do that in general, that's fine, and you can add my
Reviewed-by: Arnd Bergmann 


Re: [PATCH V9 13/21] csky: Library functions

2018-10-17 Thread Arnd Bergmann
On Tue, Oct 16, 2018 at 5:02 AM Guo Ren  wrote:

> +++ b/arch/csky/lib/delay.c
> @@ -0,0 +1,39 @@
> +// SPDX-License-Identifier: GPL-2.0
> +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +void __delay(unsigned long loops)
> +{
> +   asm volatile (
> +   "mov r0, r0\n"
> +   "1:declt %0\n"
> +   "bf 1b"
> +   : "=r"(loops)
> +   : "0"(loops));
> +}
> +EXPORT_SYMBOL(__delay);
> +
> +void __const_udelay(unsigned long xloops)
> +{
> +   unsigned long long loops;
> +
> +   loops = (unsigned long long)xloops * loops_per_jiffy * HZ;
> +
> +   __delay(loops >> 32);
> +}
> +EXPORT_SYMBOL(__const_udelay);

I can't find any indication we discussed this part before. Can you
explain (in the
changelog and in a reply) if there is a reliable high-resolution clocksource
available in the architecture? If you can read the elapsed time here instead of
using the loops_per_jiffy estimate, that would make this more robust, as
well as speed up the boot.

If you can't do that in general, that's fine, and you can add my
Reviewed-by: Arnd Bergmann 


[PATCH V9 13/21] csky: Library functions

2018-10-15 Thread Guo Ren
This patch adds string optimize codes and some auxiliary codes.

Signed-off-by: Chen Linfei 
Signed-off-by: Mao Han 
Signed-off-by: Guo Ren 
Cc: Arnd Bergmann 
---
Changelog:
 - Use bt instead of jbt in asm, jbt will cause relocation problem.
 - remove kernel/platform.c
---
---
 arch/csky/abiv1/bswapdi.c|  12 ++
 arch/csky/abiv1/bswapsi.c|  12 ++
 arch/csky/abiv1/inc/abi/string.h |  13 ++
 arch/csky/abiv1/memcpy.S | 347 +++
 arch/csky/abiv1/memset.c |  37 +
 arch/csky/abiv1/strksyms.c   |   7 +
 arch/csky/abiv2/inc/abi/string.h |  27 +++
 arch/csky/abiv2/memcmp.S | 152 +
 arch/csky/abiv2/memcpy.S | 110 +
 arch/csky/abiv2/memmove.S| 108 
 arch/csky/abiv2/memset.S |  83 ++
 arch/csky/abiv2/strcmp.S | 168 +++
 arch/csky/abiv2/strcpy.S | 123 ++
 arch/csky/abiv2/strksyms.c   |  12 ++
 arch/csky/abiv2/strlen.S |  97 +++
 arch/csky/abiv2/sysdep.h |  30 
 arch/csky/include/asm/string.h   |  13 ++
 arch/csky/kernel/power.c |  30 
 arch/csky/lib/delay.c|  39 +
 19 files changed, 1420 insertions(+)
 create mode 100644 arch/csky/abiv1/bswapdi.c
 create mode 100644 arch/csky/abiv1/bswapsi.c
 create mode 100644 arch/csky/abiv1/inc/abi/string.h
 create mode 100644 arch/csky/abiv1/memcpy.S
 create mode 100644 arch/csky/abiv1/memset.c
 create mode 100644 arch/csky/abiv1/strksyms.c
 create mode 100644 arch/csky/abiv2/inc/abi/string.h
 create mode 100644 arch/csky/abiv2/memcmp.S
 create mode 100644 arch/csky/abiv2/memcpy.S
 create mode 100644 arch/csky/abiv2/memmove.S
 create mode 100644 arch/csky/abiv2/memset.S
 create mode 100644 arch/csky/abiv2/strcmp.S
 create mode 100644 arch/csky/abiv2/strcpy.S
 create mode 100644 arch/csky/abiv2/strksyms.c
 create mode 100644 arch/csky/abiv2/strlen.S
 create mode 100644 arch/csky/abiv2/sysdep.h
 create mode 100644 arch/csky/include/asm/string.h
 create mode 100644 arch/csky/kernel/power.c
 create mode 100644 arch/csky/lib/delay.c

diff --git a/arch/csky/abiv1/bswapdi.c b/arch/csky/abiv1/bswapdi.c
new file mode 100644
index 000..f50a1d6
--- /dev/null
+++ b/arch/csky/abiv1/bswapdi.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include 
+#include 
+#include 
+
+unsigned long long notrace __bswapdi2(unsigned long long u)
+{
+   return ___constant_swab64(u);
+}
+EXPORT_SYMBOL(__bswapdi2);
diff --git a/arch/csky/abiv1/bswapsi.c b/arch/csky/abiv1/bswapsi.c
new file mode 100644
index 000..0f79182
--- /dev/null
+++ b/arch/csky/abiv1/bswapsi.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include 
+#include 
+#include 
+
+unsigned int notrace __bswapsi2(unsigned int u)
+{
+   return ___constant_swab32(u);
+}
+EXPORT_SYMBOL(__bswapsi2);
diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h
new file mode 100644
index 000..5abe80b
--- /dev/null
+++ b/arch/csky/abiv1/inc/abi/string.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef __ABI_CSKY_STRING_H
+#define __ABI_CSKY_STRING_H
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, __kernel_size_t);
+
+#endif /* __ABI_CSKY_STRING_H */
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S
new file mode 100644
index 000..5078eb5
--- /dev/null
+++ b/arch/csky/abiv1/memcpy.S
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include 
+
+.macro GET_FRONT_BITS rx y
+#ifdef __cskyLE__
+   lsri\rx, \y
+#else
+   lsli\rx, \y
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx y
+#ifdef __cskyLE__
+   lsli\rx, \y
+#else
+   lsri\rx, \y
+#endif
+.endm
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+ENTRY(memcpy)
+   mov r7, r2
+   cmplti  r4, 4
+   bt  .L_copy_by_byte
+   mov r6, r2
+   andir6, 3
+   cmpnei  r6, 0
+   jbt .L_dest_not_aligned
+   mov r6, r3
+   andir6, 3
+   cmpnei  r6, 0
+   jbt .L_dest_aligned_but_src_not_aligned
+.L0:
+   cmplti  r4, 16
+   jbt .L_aligned_and_len_less_16bytes
+   subisp, 8
+   stw r8, (sp, 0)
+.L_aligned_and_len_larger_16bytes:
+   ldw r1, (r3, 0)
+   ldw r5, (r3, 4)
+   ldw r8, (r3, 8)
+   stw r1, (r7, 0)
+   ldw r1, (r3, 12)
+   stw r5, (r7, 4)
+   stw r8, (r7, 8)
+   stw r1, (r7, 12)
+   subir4, 16
+   addir3, 16
+   addir7, 16
+   cmplti  r4, 16
+   jbf 

[PATCH V9 13/21] csky: Library functions

2018-10-15 Thread Guo Ren
This patch adds string optimize codes and some auxiliary codes.

Signed-off-by: Chen Linfei 
Signed-off-by: Mao Han 
Signed-off-by: Guo Ren 
Cc: Arnd Bergmann 
---
Changelog:
 - Use bt instead of jbt in asm, jbt will cause relocation problem.
 - remove kernel/platform.c
---
---
 arch/csky/abiv1/bswapdi.c|  12 ++
 arch/csky/abiv1/bswapsi.c|  12 ++
 arch/csky/abiv1/inc/abi/string.h |  13 ++
 arch/csky/abiv1/memcpy.S | 347 +++
 arch/csky/abiv1/memset.c |  37 +
 arch/csky/abiv1/strksyms.c   |   7 +
 arch/csky/abiv2/inc/abi/string.h |  27 +++
 arch/csky/abiv2/memcmp.S | 152 +
 arch/csky/abiv2/memcpy.S | 110 +
 arch/csky/abiv2/memmove.S| 108 
 arch/csky/abiv2/memset.S |  83 ++
 arch/csky/abiv2/strcmp.S | 168 +++
 arch/csky/abiv2/strcpy.S | 123 ++
 arch/csky/abiv2/strksyms.c   |  12 ++
 arch/csky/abiv2/strlen.S |  97 +++
 arch/csky/abiv2/sysdep.h |  30 
 arch/csky/include/asm/string.h   |  13 ++
 arch/csky/kernel/power.c |  30 
 arch/csky/lib/delay.c|  39 +
 19 files changed, 1420 insertions(+)
 create mode 100644 arch/csky/abiv1/bswapdi.c
 create mode 100644 arch/csky/abiv1/bswapsi.c
 create mode 100644 arch/csky/abiv1/inc/abi/string.h
 create mode 100644 arch/csky/abiv1/memcpy.S
 create mode 100644 arch/csky/abiv1/memset.c
 create mode 100644 arch/csky/abiv1/strksyms.c
 create mode 100644 arch/csky/abiv2/inc/abi/string.h
 create mode 100644 arch/csky/abiv2/memcmp.S
 create mode 100644 arch/csky/abiv2/memcpy.S
 create mode 100644 arch/csky/abiv2/memmove.S
 create mode 100644 arch/csky/abiv2/memset.S
 create mode 100644 arch/csky/abiv2/strcmp.S
 create mode 100644 arch/csky/abiv2/strcpy.S
 create mode 100644 arch/csky/abiv2/strksyms.c
 create mode 100644 arch/csky/abiv2/strlen.S
 create mode 100644 arch/csky/abiv2/sysdep.h
 create mode 100644 arch/csky/include/asm/string.h
 create mode 100644 arch/csky/kernel/power.c
 create mode 100644 arch/csky/lib/delay.c

diff --git a/arch/csky/abiv1/bswapdi.c b/arch/csky/abiv1/bswapdi.c
new file mode 100644
index 000..f50a1d6
--- /dev/null
+++ b/arch/csky/abiv1/bswapdi.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include 
+#include 
+#include 
+
+unsigned long long notrace __bswapdi2(unsigned long long u)
+{
+   return ___constant_swab64(u);
+}
+EXPORT_SYMBOL(__bswapdi2);
diff --git a/arch/csky/abiv1/bswapsi.c b/arch/csky/abiv1/bswapsi.c
new file mode 100644
index 000..0f79182
--- /dev/null
+++ b/arch/csky/abiv1/bswapsi.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include 
+#include 
+#include 
+
+unsigned int notrace __bswapsi2(unsigned int u)
+{
+   return ___constant_swab32(u);
+}
+EXPORT_SYMBOL(__bswapsi2);
diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h
new file mode 100644
index 000..5abe80b
--- /dev/null
+++ b/arch/csky/abiv1/inc/abi/string.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef __ABI_CSKY_STRING_H
+#define __ABI_CSKY_STRING_H
+
+#define __HAVE_ARCH_MEMCPY
+extern void *memcpy(void *, const void *, __kernel_size_t);
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *, int, __kernel_size_t);
+
+#endif /* __ABI_CSKY_STRING_H */
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S
new file mode 100644
index 000..5078eb5
--- /dev/null
+++ b/arch/csky/abiv1/memcpy.S
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include 
+
+.macro GET_FRONT_BITS rx y
+#ifdef __cskyLE__
+   lsri\rx, \y
+#else
+   lsli\rx, \y
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx y
+#ifdef __cskyLE__
+   lsli\rx, \y
+#else
+   lsri\rx, \y
+#endif
+.endm
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+ENTRY(memcpy)
+   mov r7, r2
+   cmplti  r4, 4
+   bt  .L_copy_by_byte
+   mov r6, r2
+   andir6, 3
+   cmpnei  r6, 0
+   jbt .L_dest_not_aligned
+   mov r6, r3
+   andir6, 3
+   cmpnei  r6, 0
+   jbt .L_dest_aligned_but_src_not_aligned
+.L0:
+   cmplti  r4, 16
+   jbt .L_aligned_and_len_less_16bytes
+   subisp, 8
+   stw r8, (sp, 0)
+.L_aligned_and_len_larger_16bytes:
+   ldw r1, (r3, 0)
+   ldw r5, (r3, 4)
+   ldw r8, (r3, 8)
+   stw r1, (r7, 0)
+   ldw r1, (r3, 12)
+   stw r5, (r7, 4)
+   stw r8, (r7, 8)
+   stw r1, (r7, 12)
+   subir4, 16
+   addir3, 16
+   addir7, 16
+   cmplti  r4, 16
+   jbf