Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
laforge has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 3: Code-Review+2 -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 3 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 06 Aug 2020 16:47:37 + Gerrit-HasComments: No Gerrit-Has-Labels: Yes Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
laforge has submitted this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. libomsocoding: NEON viterbi acceleration configure flag required to enable this: --enable-neon Although autodetection according to __ARM_NEON would work because this is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4 neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown performance impact, so it needs to be enabled manually. Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This requires handling a special case for RACH with len 14 which is far too short for neon and would actually incur a performance penalty of 25%. Related: OS#4585 Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 --- M configure.ac M src/Makefile.am M src/conv_acc.c A src/conv_acc_neon.c A src/conv_acc_neon_impl.h 5 files changed, 508 insertions(+), 0 deletions(-) Approvals: laforge: Looks good to me, approved Jenkins Builder: Verified diff --git a/configure.ac b/configure.ac index f69c78d..2397b2f 100644 --- a/configure.ac +++ b/configure.ac @@ -378,6 +378,17 @@ AM_CONDITIONAL(HAVE_SSE4_1, false) fi +AC_ARG_ENABLE(neon, + [AS_HELP_STRING( + [--enable-neon], + [Enable NEON support] + )], + [neon=$enableval], [neon="no"]) +AC_DEFINE(HAVE_NEON,, +[Support ARM NEON instructions]) +AM_CONDITIONAL(HAVE_NEON, [test "x$neon" != "xno"]) + + OSMO_AC_CODE_COVERAGE dnl Check if the compiler supports specified GCC's built-in function diff --git a/src/Makefile.am b/src/Makefile.am index 16119d9..be09784 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -48,6 +48,11 @@ endif endif +if HAVE_NEON +libosmocore_la_SOURCES += conv_acc_neon.c +# conv_acc_neon.lo : AM_CFLAGS += -mfpu=neon no, could as well be vfp with neon +endif + BUILT_SOURCES = crc8gen.c crc16gen.c crc32gen.c crc64gen.c EXTRA_DIST = conv_acc_sse_impl.h crcXXgen.c.tpl diff --git a/src/conv_acc.c b/src/conv_acc.c index c16e436..0f6f7ca 100644 --- a/src/conv_acc.c +++ b/src/conv_acc.c @@ -85,6 +85,11 @@ void osmo_conv_sse_avx_vdec_free(int16_t *ptr); #endif +#ifdef HAVE_NEON +int16_t *osmo_conv_neon_vdec_malloc(size_t n); +void osmo_conv_neon_vdec_free(int16_t *ptr); +#endif + /* Forward Metric Units */ void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); @@ -129,6 +134,21 @@ int16_t *sums, int16_t *paths, int norm); #endif +#if defined(HAVE_NEON) +void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +#endif + /* Trellis State * state - Internal lshift register value * prev - Register values of previous 0 and 1 states @@ -528,6 +548,12 @@ if (dec->k == 5) { switch (dec->n) { case 2: +/* rach len 14 is too short for neon */ +#ifdef HAVE_NEON + if (code->len < 100) + dec->metric_func = osmo_conv_gen_metrics_k5_n2; + else +#endif dec->metric_func = osmo_conv_metrics_k5_n2; break; case 3: @@ -681,6 +707,8 @@ } else { INIT_POINTERS(gen); } +#elif defined(HAVE_NEON) + INIT_POINTERS(neon); #else INIT_POINTERS(gen); #endif diff --git a/src/conv_acc_neon.c b/src/conv_acc_neon.c new file mode 100644 index 000..7244946 --- /dev/null +++ b/src/conv_acc_neon.c @@ -0,0 +1,110 @@ +/*! \file conv_acc_neon.c + * Accelerated Viterbi decoder implementation + * for architectures with only NEON available. */ +/* + * (C) 2020 by sysmocom - s.f.m.c. GmbH + * Author: Eric Wild + * + * All Rights Reserved + * + * SPDX-License-Identifier: GPL-2.0+ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
Hoernchen has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 3: (1 comment) https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c File src/conv_acc.c: https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c@557 PS3, Line 557: dec->metric_func = osmo_conv_metrics_k5_n2; > wrong indentation not addressed. it is not wrong, it is the reasonable approach here because your proposed "fix" suddenly introduces a new scope around the assignment in case NEON is not defined which clearly is not a preferrable choice just to make something "look nice" because it actually impacts the code itself and not merely cosmetic things -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 3 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 23 Jul 2020 14:35:29 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: pespin Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
Hoernchen has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 3: (1 comment) https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG@14 PS3, Line 14: performance impact, so it needs to be enabled manually. > Please put in here the cortex-a8 situation like Harald suggested The fpu feature situation is far more severe. -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 3 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 23 Jul 2020 14:32:05 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: pespin Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
pespin has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 3: (3 comments) https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/3//COMMIT_MSG@14 PS3, Line 14: performance impact, so it needs to be enabled manually. Please put in here the cortex-a8 situation like Harald suggested https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c File src/conv_acc.c: https://gerrit.osmocom.org/c/libosmocore/+/19372/3/src/conv_acc.c@557 PS3, Line 557: dec->metric_func = osmo_conv_metrics_k5_n2; wrong indentation not addressed. https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h File src/conv_acc_neon_impl.h: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@26 PS1, Line 26: /* Some distributions (notably Alpine Linux) for some strange reason > This is a copy from the sse files and I see no reason to question this or > waste time tracking it dow […] Ack -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 3 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 23 Jul 2020 14:26:47 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: pespin Comment-In-Reply-To: Hoernchen Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
Hello fixeria, laforge, Jenkins Builder, I'd like you to reexamine a change. Please visit https://gerrit.osmocom.org/c/libosmocore/+/19372 to look at the new patch set (#3). Change subject: libomsocoding: NEON viterbi acceleration .. libomsocoding: NEON viterbi acceleration configure flag required to enable this: --enable-neon Although autodetection according to __ARM_NEON would work because this is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4 neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown performance impact, so it needs to be enabled manually. Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This requires handling a special case for RACH with len 14 which is far too short for neon and would actually incur a performance penalty of 25%. Related: OS#4585 Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 --- M configure.ac M src/Makefile.am M src/conv_acc.c A src/conv_acc_neon.c A src/conv_acc_neon_impl.h 5 files changed, 508 insertions(+), 0 deletions(-) git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/72/19372/3 -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 3 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-MessageType: newpatchset
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
Hello fixeria, laforge, Jenkins Builder, I'd like you to reexamine a change. Please visit https://gerrit.osmocom.org/c/libosmocore/+/19372 to look at the new patch set (#2). Change subject: libomsocoding: NEON viterbi acceleration .. libomsocoding: NEON viterbi acceleration configure flag required to enable this: --enable-neon Although autodetection according to __ARM_NEON would work because this is only defined if the fpu is neon neon-fp16 neon-vfpv3 neon-vfpv4 neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown performance impact, so it needs to be enabled manually. Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This requires handling a special case for RACH with len 14 which is far too short for neon and would actually incur a performance penalty of 25%. Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 --- M configure.ac M src/Makefile.am M src/conv_acc.c A src/conv_acc_neon.c A src/conv_acc_neon_impl.h 5 files changed, 508 insertions(+), 0 deletions(-) git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/72/19372/2 -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 2 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-MessageType: newpatchset
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
Hoernchen has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 1: (1 comment) https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11 PS1, Line 11: Although autodetection according to __ARM_NEON would work because this : is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4 : neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown : performance impact, so it needs to be enabled manually. > thanks for explaining the Cortex-A8 situation. […] And there is the case where you have to compile with neon to get neon at runtime for a specific fpu model, then detect the neon cpu feature at run time, but the target system might actually be compiled for a different fpu layout/"no fpu" soft float model that is not related to the cpu neon feature. -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 1 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 23 Jul 2020 13:07:26 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: laforge Comment-In-Reply-To: Hoernchen Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
laforge has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 1: (1 comment) https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11 PS1, Line 11: Although autodetection according to __ARM_NEON would work because this : is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4 : neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown : performance impact, so it needs to be enabled manually. > SS(S)E/AVX is a cpu feature set that differs depending on bits provided by > the cpu in a reg (and the […] thanks for explaining the Cortex-A8 situation. It probably deserves to be mentioned here as a concrete example why we don't enable it by default. After that explanation, I'm happy with your patch (minus the copyright notice). -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 1 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 23 Jul 2020 12:52:24 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: laforge Comment-In-Reply-To: Hoernchen Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
Hoernchen has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 1: (5 comments) https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@7 PS1, Line 7: libomsocoding > Actually, it's not really libosmocoding. […] I know, but the user of this code is libosmocoding, so this speds up libosmocoding, and therefore provides a more precise description of what now has neon support. https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@14 PS1, Line 14: performance impact, so it needs to be enabled manually. > I don't get what do you mean here. […] Just like the special rach case neon can be bad, i.e. on a cortex a8 with in-order dual issue pipeline neon performance is abysmal. https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11 PS1, Line 11: Although autodetection according to __ARM_NEON would work because this : is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4 : neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown : performance impact, so it needs to be enabled manually. > I don't like this kind of approach. […] SS(S)E/AVX is a cpu feature set that differs depending on bits provided by the cpu in a reg (and the knowledge that x64 means at least SSE2), NEON is a compile time target specific fpu and fp api configuration that is eternally set in stone at compile time. Detection would therefore happen at compile time. Mixing neon and the fpu is bad, as is neon on a dual issue in-order cortex a8, so there are valid reason why it should not just be enabled. That being said it's the users or devices fault, I'm fine with just turning it on. https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am File src/Makefile.am: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am@53 PS1, Line 53: no, could as well be vfp with neon > Is it a comment or a part of AM_CFLAGS? Just a reminder why adding explicit flags for neon instead or reyling upon the cflags provided by the build environment is a bad idea. https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h File src/conv_acc_neon_impl.h: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@26 PS1, Line 26: /* Some distributions (notably Alpine Linux) for some strange reason > In my linux (ArchLinux) __always_inline is defined in > /usr/include/linux/stddef.h. […] This is a copy from the sse files and I see no reason to question this or waste time tracking it down.. -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 1 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 23 Jul 2020 10:43:40 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Comment-In-Reply-To: fixeria Comment-In-Reply-To: pespin Comment-In-Reply-To: laforge Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
pespin has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 1: (6 comments) https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@9 PS1, Line 9: configure flag required to enable this: --enable-neon double space "required to" https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@12 PS1, Line 12: is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4 type: missing n in nenon. https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@14 PS1, Line 14: performance impact, so it needs to be enabled manually. I don't get what do you mean here. What do you mean with "unknown performance impact" here, why? https://gerrit.osmocom.org/c/libosmocore/+/19372/1/configure.ac File configure.ac: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/configure.ac@388 PS1, Line 388: [Support ARM NEON instructions]) I would say this flag is more "Enable neon instruction specific optimizations", because you can still disable it regardless of your target supporting neon or not. https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c File src/conv_acc.c: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c@557 PS1, Line 557: dec->metric_func = osmo_conv_metrics_k5_n2; wrong indentation. Probably rather: #ifdef HAVE_NEON if (code->len < 100) { dec->metric_func = osmo_conv_gen_metrics_k5_n2; } else #endif { dec->metric_func = osmo_conv_metrics_k5_n2; } https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h File src/conv_acc_neon_impl.h: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@26 PS1, Line 26: /* Some distributions (notably Alpine Linux) for some strange reason In my linux (ArchLinux) __always_inline is defined in /usr/include/linux/stddef.h. Perhaps you are missing to include a header defining it? -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 1 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-CC: pespin Gerrit-Comment-Date: Thu, 23 Jul 2020 08:39:39 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
laforge has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 1: Code-Review-1 (3 comments) -1 just for the legality of the copyright notice. the other comment is not worth a -1. https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@11 PS1, Line 11: Although autodetection according to __ARM_NEON would work because this : is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4 : neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown : performance impact, so it needs to be enabled manually. I don't like this kind of approach. We have auto-detection for x86 SIMD instructions, and we should try to align with that for NEON. The detection would only have to be done once at startup, right? Projects like ffmpeg are doing run-time detection of CPU features for a very long time. So without even knowing how bad the impact would be, I'm not sure we should disable it. The only question would be whether or not we should enable it by default in all our builds anyway. I'm not sure who is running osmo-bts-trx on an ARM without NEON support. We have plenty of osmo-bts-sysmo on old cores, probably also -octphy. But we've never heard of an osmo-bts-trx user who doesn't have a modern, NEON enabled core, AFAICT. https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon.c File src/conv_acc_neon.c: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon.c@5 PS1, Line 5: Copyright (C) 2020 Eric Wild erm, coypright by sysmocom, Author is you. At least as long as it was done during work hours :P https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h File src/conv_acc_neon_impl.h: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc_neon_impl.h@5 PS1, Line 5: same here of course -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 1 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Reviewer: laforge Gerrit-Comment-Date: Thu, 23 Jul 2020 08:26:09 + Gerrit-HasComments: Yes Gerrit-Has-Labels: Yes Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
fixeria has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 1: (1 comment) https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG Commit Message: https://gerrit.osmocom.org/c/libosmocore/+/19372/1//COMMIT_MSG@7 PS1, Line 7: libomsocoding Actually, it's not really libosmocoding. Due to historical reasons, the Viterbi implementation is still in libosmocore. -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 1 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Comment-Date: Thu, 23 Jul 2020 07:34:56 + Gerrit-HasComments: Yes Gerrit-Has-Labels: No Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
fixeria has posted comments on this change. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. Patch Set 1: Code-Review+1 (2 comments) https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am File src/Makefile.am: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/Makefile.am@53 PS1, Line 53: no, could as well be vfp with neon Is it a comment or a part of AM_CFLAGS? https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c File src/conv_acc.c: https://gerrit.osmocom.org/c/libosmocore/+/19372/1/src/conv_acc.c@553 PS1, Line 553: if(code->len < 100) If is not a function, missing space. -- To view, visit https://gerrit.osmocom.org/c/libosmocore/+/19372 To unsubscribe, or for help writing mail filters, visit https://gerrit.osmocom.org/settings Gerrit-Project: libosmocore Gerrit-Branch: master Gerrit-Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 Gerrit-Change-Number: 19372 Gerrit-PatchSet: 1 Gerrit-Owner: Hoernchen Gerrit-Reviewer: Jenkins Builder Gerrit-Reviewer: fixeria Gerrit-Comment-Date: Thu, 23 Jul 2020 07:31:56 + Gerrit-HasComments: Yes Gerrit-Has-Labels: Yes Gerrit-MessageType: comment
Change in libosmocore[master]: libomsocoding: NEON viterbi acceleration
Hoernchen has uploaded this change for review. ( https://gerrit.osmocom.org/c/libosmocore/+/19372 ) Change subject: libomsocoding: NEON viterbi acceleration .. libomsocoding: NEON viterbi acceleration configure flag required to enable this: --enable-neon Although autodetection according to __ARM_NEON would work because this is only defined if the fpu is neon neon-fp16 neon-vfpv3 eon-vfpv4 neon-fp-armv8 crypto-neon-fp-armv8 doing that would lead to a unknown performance impact, so it needs to be enabled manually. Speedup is about ~1.3-1.5 on a unspecified single core Cortex A9. This requires handling a special case for RACH with len 14 which is far too short for neon and would actually incur a performance penalty of 25%. Change-Id: I58ff2cb4ce3514f43390ff0a2121f81e6a4983b5 --- M configure.ac M src/Makefile.am M src/conv_acc.c A src/conv_acc_neon.c A src/conv_acc_neon_impl.h 5 files changed, 506 insertions(+), 0 deletions(-) git pull ssh://gerrit.osmocom.org:29418/libosmocore refs/changes/72/19372/1 diff --git a/configure.ac b/configure.ac index f69c78d..2397b2f 100644 --- a/configure.ac +++ b/configure.ac @@ -378,6 +378,17 @@ AM_CONDITIONAL(HAVE_SSE4_1, false) fi +AC_ARG_ENABLE(neon, + [AS_HELP_STRING( + [--enable-neon], + [Enable NEON support] + )], + [neon=$enableval], [neon="no"]) +AC_DEFINE(HAVE_NEON,, +[Support ARM NEON instructions]) +AM_CONDITIONAL(HAVE_NEON, [test "x$neon" != "xno"]) + + OSMO_AC_CODE_COVERAGE dnl Check if the compiler supports specified GCC's built-in function diff --git a/src/Makefile.am b/src/Makefile.am index 16119d9..be09784 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -48,6 +48,11 @@ endif endif +if HAVE_NEON +libosmocore_la_SOURCES += conv_acc_neon.c +# conv_acc_neon.lo : AM_CFLAGS += -mfpu=neon no, could as well be vfp with neon +endif + BUILT_SOURCES = crc8gen.c crc16gen.c crc32gen.c crc64gen.c EXTRA_DIST = conv_acc_sse_impl.h crcXXgen.c.tpl diff --git a/src/conv_acc.c b/src/conv_acc.c index c16e436..773d87d 100644 --- a/src/conv_acc.c +++ b/src/conv_acc.c @@ -85,6 +85,11 @@ void osmo_conv_sse_avx_vdec_free(int16_t *ptr); #endif +#ifdef HAVE_NEON +int16_t *osmo_conv_neon_vdec_malloc(size_t n); +void osmo_conv_neon_vdec_free(int16_t *ptr); +#endif + /* Forward Metric Units */ void osmo_conv_gen_metrics_k5_n2(const int8_t *seq, const int16_t *out, int16_t *sums, int16_t *paths, int norm); @@ -129,6 +134,21 @@ int16_t *sums, int16_t *paths, int norm); #endif +#if defined(HAVE_NEON) +void osmo_conv_neon_metrics_k5_n2(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k5_n3(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k5_n4(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n2(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n3(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +void osmo_conv_neon_metrics_k7_n4(const int8_t *seq, const int16_t *out, + int16_t *sums, int16_t *paths, int norm); +#endif + /* Trellis State * state - Internal lshift register value * prev - Register values of previous 0 and 1 states @@ -528,6 +548,12 @@ if (dec->k == 5) { switch (dec->n) { case 2: +/* rach len 14 is too short for neon */ +#ifdef HAVE_NEON + if(code->len < 100) + dec->metric_func = osmo_conv_gen_metrics_k5_n2; + else +#endif dec->metric_func = osmo_conv_metrics_k5_n2; break; case 3: @@ -681,6 +707,8 @@ } else { INIT_POINTERS(gen); } +#elif defined(HAVE_NEON) + INIT_POINTERS(neon); #else INIT_POINTERS(gen); #endif diff --git a/src/conv_acc_neon.c b/src/conv_acc_neon.c new file mode 100644 index 000..d6ba115 --- /dev/null +++ b/src/conv_acc_neon.c @@ -0,0 +1,109 @@ +/*! \file conv_acc_neon.c + * Accelerated Viterbi decoder implementation + * for architectures with only NEON available. */ +/* + * Copyright (C) 2020 Eric Wild + * + * All Rights Reserved + * + * SPDX-License-Identifier: GPL-2.0+ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU