Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-08-06 Thread laforge
laforge has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 3: Code-Review+2


--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 3
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 06 Aug 2020 16:47:37 +
Gerrit-HasComments: No
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-08-06 Thread laforge
laforge has submitted this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..

libomsocoding: NEON viterbi acceleration

configure flag required to enable this: --enable-neon

Although autodetection according to __ARM_NEON would work because this
is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4
neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown
performance impact, so it needs to be enabled manually.

Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This
requires handling a special case for RACH with len 14 which is far too
short for neon and would actually incur a performance penalty of 25%.

Related: OS#4585
Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
---
M configure.ac
M src/Makefile.am
M src/conv_acc.c
A src/conv_acc_neon.c
A src/conv_acc_neon_impl.h
5 files changed, 508 insertions(+), 0 deletions(-)

Approvals:
  laforge: Looks good to me, approved
  Jenkins Builder: Verified



diff --git a/configure.ac b/configure.ac
index f69c78d..2397b2f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -378,6 +378,17 @@
AM_CONDITIONAL(HAVE_SSE4_1, false)
 fi

+AC_ARG_ENABLE(neon,
+   [AS_HELP_STRING(
+   [--enable-neon],
+   [Enable NEON support]
+   )],
+   [neon=$enableval], [neon="no"])
+AC_DEFINE(HAVE_NEON,,
+[Support ARM NEON instructions])
+AM_CONDITIONAL(HAVE_NEON, [test "x$neon" != "xno"])
+
+
 OSMO_AC_CODE_COVERAGE

 dnl Check if the compiler supports specified GCC's built-in function
diff --git a/src/Makefile.am b/src/Makefile.am
index 16119d9..be09784 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -48,6 +48,11 @@
 endif
 endif

+if HAVE_NEON
+libosmocore_la_SOURCES += conv_acc_neon.c
+# conv_acc_neon.lo : AM_CFLAGS += -mfpu=neon no, could as well be vfp with neon
+endif
+
 BUILT_SOURCES = crc8gen.c crc16gen.c crc32gen.c crc64gen.c
 EXTRA_DIST = conv_acc_sse_impl.h crcXXgen.c.tpl

diff --git a/src/conv_acc.c b/src/conv_acc.c
index c16e436..0f6f7ca 100644
--- a/src/conv_acc.c
+++ b/src/conv_acc.c
@@ -85,6 +85,11 @@
 void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
 #endif

+#ifdef HAVE_NEON
+int16_t *osmo_conv_neon_vdec_malloc(size_t n);
+void osmo_conv_neon_vdec_free(int16_t *ptr);
+#endif
+
 /* Forward Metric Units */
 void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
@@ -129,6 +134,21 @@
int16_t *sums, int16_t *paths, int norm);
 #endif

+#if defined(HAVE_NEON)
+void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+#endif
+
 /* Trellis State
  * state - Internal lshift register value
  * prev  - Register values of previous 0 and 1 states
@@ -528,6 +548,12 @@
if (dec->k == 5) {
switch (dec->n) {
case 2:
+/* rach len 14 is too short for neon */
+#ifdef HAVE_NEON
+   if (code->len < 100)
+   dec->metric_func = osmo_conv_gen_metrics_k5_n2;
+   else
+#endif
dec->metric_func = osmo_conv_metrics_k5_n2;
break;
case 3:
@@ -681,6 +707,8 @@
} else {
INIT_POINTERS(gen);
}
+#elif defined(HAVE_NEON)
+   INIT_POINTERS(neon);
 #else
INIT_POINTERS(gen);
 #endif
diff --git a/src/conv_acc_neon.c b/src/conv_acc_neon.c
new file mode 100644
index 000..7244946
--- /dev/null
+++ b/src/conv_acc_neon.c
@@ -0,0 +1,110 @@
+/*! \file conv_acc_neon.c
+ * Accelerated Viterbi decoder implementation
+ * for architectures with only NEON available. */
+/*
+ * (C) 2020 by sysmocom - s.f.m.c. GmbH
+ * Author: Eric Wild
+ *
+ * All Rights Reserved
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A 

Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread Hoernchen
Hoernchen has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 3:

(1 comment)

https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c
File src/conv_acc.c:

https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c@557
PS3, Line 557:  dec->metric_func = osmo_conv_metrics_k5_n2;
> wrong indentation not addressed.
it is not wrong, it is the reasonable approach here because your proposed "fix" 
suddenly introduces a new scope around the assignment in case NEON is not 
defined which clearly is not a preferrable choice just to make something "look 
nice" because it actually impacts the code itself and not merely cosmetic things



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 3
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 23 Jul 2020 14:35:29 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Comment-In-Reply-To: pespin 
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread Hoernchen
Hoernchen has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 3:

(1 comment)

https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG@14
PS3, Line 14: performance impact, so it needs to be enabled manually.
> Please put in here the cortex-a8 situation like Harald suggested
The fpu feature situation is far more severe.



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 3
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 23 Jul 2020 14:32:05 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Comment-In-Reply-To: pespin 
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread pespin
pespin has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 3:

(3 comments)

https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG@14
PS3, Line 14: performance impact, so it needs to be enabled manually.
Please put in here the cortex-a8 situation like Harald suggested


https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c
File src/conv_acc.c:

https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c@557
PS3, Line 557:  dec->metric_func = osmo_conv_metrics_k5_n2;
wrong indentation not addressed.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h
File src/conv_acc_neon_impl.h:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@26
PS1, Line 26: /* Some distributions (notably Alpine Linux) for some strange 
reason
> This is a copy from the sse files and I see no reason to question this or 
> waste time tracking it dow […]
Ack



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 3
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 23 Jul 2020 14:26:47 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Comment-In-Reply-To: pespin 
Comment-In-Reply-To: Hoernchen 
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread Hoernchen
Hello fixeria, laforge, Jenkins Builder,

I'd like you to reexamine a change. Please visit

https://gerrit.osmocom.org/c/libosmocore/+/19372

to look at the new patch set (#3).

Change subject: libomsocoding: NEON viterbi acceleration
..

libomsocoding: NEON viterbi acceleration

configure flag required to enable this: --enable-neon

Although autodetection according to __ARM_NEON would work because this
is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4
neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown
performance impact, so it needs to be enabled manually.

Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This
requires handling a special case for RACH with len 14 which is far too
short for neon and would actually incur a performance penalty of 25%.

Related: OS#4585
Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
---
M configure.ac
M src/Makefile.am
M src/conv_acc.c
A src/conv_acc_neon.c
A src/conv_acc_neon_impl.h
5 files changed, 508 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/72/19372/3
--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 3
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-MessageType: newpatchset


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread Hoernchen
Hello fixeria, laforge, Jenkins Builder,

I'd like you to reexamine a change. Please visit

https://gerrit.osmocom.org/c/libosmocore/+/19372

to look at the new patch set (#2).

Change subject: libomsocoding: NEON viterbi acceleration
..

libomsocoding: NEON viterbi acceleration

configure flag required to enable this: --enable-neon

Although autodetection according to __ARM_NEON would work because this
is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4
neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown
performance impact, so it needs to be enabled manually.

Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This
requires handling a special case for RACH with len 14 which is far too
short for neon and would actually incur a performance penalty of 25%.

Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
---
M configure.ac
M src/Makefile.am
M src/conv_acc.c
A src/conv_acc_neon.c
A src/conv_acc_neon_impl.h
5 files changed, 508 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/72/19372/2
-- 
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 2
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-MessageType: newpatchset


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread Hoernchen
Hoernchen has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 1:

(1 comment)

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11
PS1, Line 11: Although autodetection according to __ARM_NEON would work because 
this
: is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4
: neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a 
unknown
: performance impact, so it needs to be enabled manually.
> thanks for explaining the Cortex-A8 situation. […]
And there is the case where you have to compile with neon to get neon at 
runtime for a specific fpu model, then detect the neon cpu feature at run time, 
but the target system might actually be compiled for a different fpu layout/"no 
fpu" soft float model that is not related to the cpu neon feature.



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 1
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 23 Jul 2020 13:07:26 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Comment-In-Reply-To: laforge 
Comment-In-Reply-To: Hoernchen 
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread laforge
laforge has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 1:

(1 comment)

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11
PS1, Line 11: Although autodetection according to __ARM_NEON would work because 
this
: is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4
: neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a 
unknown
: performance impact, so it needs to be enabled manually.
> SS(S)E/AVX is a cpu feature set that differs depending on bits provided by 
> the cpu in a reg (and the […]
thanks for explaining the Cortex-A8 situation. It probably deserves to be 
mentioned here as a concrete example why we don't enable it by default.

After that explanation, I'm happy with your patch (minus the copyright notice).



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 1
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 23 Jul 2020 12:52:24 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Comment-In-Reply-To: laforge 
Comment-In-Reply-To: Hoernchen 
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread Hoernchen
Hoernchen has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 1:

(5 comments)

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@7
PS1, Line 7: libomsocoding
> Actually, it's not really libosmocoding. […]
I know, but the user of this code is libosmocoding, so this speds up 
libosmocoding, and therefore provides a more precise description of what now 
has neon support.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@14
PS1, Line 14: performance impact, so it needs to be enabled manually.
> I don't get what do you mean here. […]
Just like the special rach case neon can be bad, i.e. on a cortex a8 with 
in-order dual issue pipeline neon performance is abysmal.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11
PS1, Line 11: Although autodetection according to __ARM_NEON would work because 
this
: is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4
: neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a 
unknown
: performance impact, so it needs to be enabled manually.
> I don't like this kind of approach. […]
SS(S)E/AVX is a cpu feature set that differs depending on bits provided by the 
cpu in a reg (and the knowledge that x64 means at least SSE2), NEON is a 
compile time target specific fpu and fp api configuration that is eternally set 
in stone at compile time. Detection would therefore happen at compile time.

Mixing neon and the fpu is bad, as is neon on a dual issue in-order cortex a8, 
so there are valid reason why it should not just be enabled.

That being said it's the users or devices fault, I'm fine with just turning it 
on.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am
File src/Makefile.am:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am@53
PS1, Line 53: no, could as well be vfp with neon
> Is it a comment or a part of AM_CFLAGS?
Just a reminder why adding explicit flags for neon instead or reyling upon the 
cflags provided by the build environment is a bad idea.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h
File src/conv_acc_neon_impl.h:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@26
PS1, Line 26: /* Some distributions (notably Alpine Linux) for some strange 
reason
> In my linux (ArchLinux) __always_inline is defined in 
> /usr/include/linux/stddef.h. […]
This is a copy from the sse files and I see no reason to question this or waste 
time tracking it down..



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 1
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 23 Jul 2020 10:43:40 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Comment-In-Reply-To: fixeria 
Comment-In-Reply-To: pespin 
Comment-In-Reply-To: laforge 
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread pespin
pespin has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 1:

(6 comments)

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@9
PS1, Line 9: configure flag required  to enable this: --enable-neon
double space "required  to"


https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@12
PS1, Line 12: is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4
type: missing n in nenon.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@14
PS1, Line 14: performance impact, so it needs to be enabled manually.
I don't get what do you mean here. What do you mean with "unknown performance 
impact" here, why?


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/configure.ac
File configure.ac:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/configure.ac@388
PS1, Line 388: [Support ARM NEON instructions])
I would say this flag is more "Enable neon instruction specific optimizations", 
because you can still disable it regardless of your target supporting neon or 
not.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c
File src/conv_acc.c:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c@557
PS1, Line 557:  dec->metric_func = osmo_conv_metrics_k5_n2;
wrong indentation.

Probably rather:
#ifdef HAVE_NEON
if (code->len < 100) {
dec->metric_func = osmo_conv_gen_metrics_k5_n2;
} else
#endif
{
dec->metric_func = osmo_conv_metrics_k5_n2;
}


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h
File src/conv_acc_neon_impl.h:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@26
PS1, Line 26: /* Some distributions (notably Alpine Linux) for some strange 
reason
In my linux (ArchLinux) __always_inline is defined in 
/usr/include/linux/stddef.h. Perhaps you are missing to include a header 
defining it?



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 1
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-CC: pespin 
Gerrit-Comment-Date: Thu, 23 Jul 2020 08:39:39 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread laforge
laforge has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 1: Code-Review-1

(3 comments)

-1 just for the legality of the copyright notice. the other comment is not 
worth a -1.

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11
PS1, Line 11: Although autodetection according to __ARM_NEON would work because 
this
: is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4
: neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a 
unknown
: performance impact, so it needs to be enabled manually.
I don't like this kind of approach.  We have auto-detection for x86 SIMD 
instructions, and we should try to align with that for NEON.  The detection 
would only have to be done once at startup, right?  Projects like ffmpeg are 
doing run-time detection of CPU features for a very long time.

So without even knowing how bad the impact would be, I'm not sure we should 
disable it.

The only question would be whether or not we should enable it by default in all 
our builds anyway.  I'm not sure who is running osmo-bts-trx on an ARM without 
NEON support.  We have plenty of osmo-bts-sysmo on old cores, probably also 
-octphy. But we've never heard of an osmo-bts-trx user who doesn't have a 
modern, NEON enabled core, AFAICT.


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon.c
File src/conv_acc_neon.c:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon.c@5
PS1, Line 5: Copyright (C) 2020 Eric Wild 
erm, coypright by sysmocom, Author is you. At least as long as it was done 
during work hours :P


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h
File src/conv_acc_neon_impl.h:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@5
PS1, Line 5:
same here of course



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 1
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Reviewer: laforge 
Gerrit-Comment-Date: Thu, 23 Jul 2020 08:26:09 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread fixeria
fixeria has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 1:

(1 comment)

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG
Commit Message:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@7
PS1, Line 7: libomsocoding
Actually, it's not really libosmocoding. Due to historical reasons, the Viterbi 
implementation is still in libosmocore.



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 1
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Comment-Date: Thu, 23 Jul 2020 07:34:56 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: No
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-23 Thread fixeria
fixeria has posted comments on this change. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )

Change subject: libomsocoding: NEON viterbi acceleration
..


Patch Set 1: Code-Review+1

(2 comments)

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am
File src/Makefile.am:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am@53
PS1, Line 53: no, could as well be vfp with neon
Is it a comment or a part of AM_CFLAGS?


https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c
File src/conv_acc.c:

https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c@553
PS1, Line 553: if(code->len < 100)
If is not a function, missing space.



--
To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.osmocom.org/settings

Gerrit-Project: libosmocore
Gerrit-Branch: master
Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
Gerrit-Change-Number: 19372
Gerrit-PatchSet: 1
Gerrit-Owner: Hoernchen 
Gerrit-Reviewer: Jenkins Builder
Gerrit-Reviewer: fixeria 
Gerrit-Comment-Date: Thu, 23 Jul 2020 07:31:56 +
Gerrit-HasComments: Yes
Gerrit-Has-Labels: Yes
Gerrit-MessageType: comment


Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration

2020-07-22 Thread Hoernchen
Hoernchen has uploaded this change for review. ( 
https://gerrit.osmocom.org/c/libosmocore/+/19372 )


Change subject: libomsocoding: NEON viterbi acceleration
..

libomsocoding: NEON viterbi acceleration

configure flag required  to enable this: --enable-neon

Although autodetection according to __ARM_NEON would work because this
is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4
neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown
performance impact, so it needs to be enabled manually.

Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This
requires handling a special case for RACH with len 14 which is far too
short for neon and would actually incur a performance penalty of 25%.

Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5
---
M configure.ac
M src/Makefile.am
M src/conv_acc.c
A src/conv_acc_neon.c
A src/conv_acc_neon_impl.h
5 files changed, 506 insertions(+), 0 deletions(-)



  git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/72/19372/1

diff --git a/configure.ac b/configure.ac
index f69c78d..2397b2f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -378,6 +378,17 @@
AM_CONDITIONAL(HAVE_SSE4_1, false)
 fi

+AC_ARG_ENABLE(neon,
+   [AS_HELP_STRING(
+   [--enable-neon],
+   [Enable NEON support]
+   )],
+   [neon=$enableval], [neon="no"])
+AC_DEFINE(HAVE_NEON,,
+[Support ARM NEON instructions])
+AM_CONDITIONAL(HAVE_NEON, [test "x$neon" != "xno"])
+
+
 OSMO_AC_CODE_COVERAGE

 dnl Check if the compiler supports specified GCC's built-in function
diff --git a/src/Makefile.am b/src/Makefile.am
index 16119d9..be09784 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -48,6 +48,11 @@
 endif
 endif

+if HAVE_NEON
+libosmocore_la_SOURCES += conv_acc_neon.c
+# conv_acc_neon.lo : AM_CFLAGS += -mfpu=neon no, could as well be vfp with neon
+endif
+
 BUILT_SOURCES = crc8gen.c crc16gen.c crc32gen.c crc64gen.c
 EXTRA_DIST = conv_acc_sse_impl.h crcXXgen.c.tpl

diff --git a/src/conv_acc.c b/src/conv_acc.c
index c16e436..773d87d 100644
--- a/src/conv_acc.c
+++ b/src/conv_acc.c
@@ -85,6 +85,11 @@
 void osmo_conv_sse_avx_vdec_free(int16_t *ptr);
 #endif

+#ifdef HAVE_NEON
+int16_t *osmo_conv_neon_vdec_malloc(size_t n);
+void osmo_conv_neon_vdec_free(int16_t *ptr);
+#endif
+
 /* Forward Metric Units */
 void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out,
int16_t *sums, int16_t *paths, int norm);
@@ -129,6 +134,21 @@
int16_t *sums, int16_t *paths, int norm);
 #endif

+#if defined(HAVE_NEON)
+void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out,
+   int16_t *sums, int16_t *paths, int norm);
+#endif
+
 /* Trellis State
  * state - Internal lshift register value
  * prev  - Register values of previous 0 and 1 states
@@ -528,6 +548,12 @@
if (dec->k == 5) {
switch (dec->n) {
case 2:
+/* rach len 14 is too short for neon */
+#ifdef HAVE_NEON
+   if(code->len < 100)
+   dec->metric_func = osmo_conv_gen_metrics_k5_n2;
+   else
+#endif
dec->metric_func = osmo_conv_metrics_k5_n2;
break;
case 3:
@@ -681,6 +707,8 @@
} else {
INIT_POINTERS(gen);
}
+#elif defined(HAVE_NEON)
+   INIT_POINTERS(neon);
 #else
INIT_POINTERS(gen);
 #endif
diff --git a/src/conv_acc_neon.c b/src/conv_acc_neon.c
new file mode 100644
index 000..d6ba115
--- /dev/null
+++ b/src/conv_acc_neon.c
@@ -0,0 +1,109 @@
+/*! \file conv_acc_neon.c
+ * Accelerated Viterbi decoder implementation
+ * for architectures with only NEON available. */
+/*
+ * Copyright (C) 2020 Eric Wild 
+ *
+ * All Rights Reserved
+ *
+ * SPDX-License-Identifier: GPL-2.0+
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU