Hi,

On Apr 26 20:46:51, b...@comstyle.com wrote:
> Implement SSE2 lrint() and lrintf() on amd64.

I don't think this is worth the added complexity:
seven more patches to have a different lrint()?
Does it make the resampling noticably better/faster?

Also, the patch changes the CONFIGURE_STYLE from gnu to autoreconf
and hardwires the autoconf and automake version (without explicitly
depending on them) - presumably because configure.ac is patched
so ./configure must be recreated.

This seems to basicaly replicate the SSE2 github commit(s) in the port.
I would wait for a release that will already contain this.

        Jan




> 
> Index: Makefile
> ===================================================================
> RCS file: /cvs/ports/audio/libsamplerate/Makefile,v
> retrieving revision 1.27
> diff -u -p -u -p -r1.27 Makefile
> --- Makefile  5 Sep 2023 16:13:38 -0000       1.27
> +++ Makefile  27 Apr 2024 00:26:05 -0000
> @@ -2,7 +2,7 @@ COMMENT=      audio sample rate conversion li
>  
>  VER=         0.2.2
>  DISTNAME=    libsamplerate-${VER}
> -REVISION=    0
> +REVISION=    1
>  CATEGORIES=  audio
>  EXTRACT_SUFX=        .tar.xz
>  
> @@ -18,7 +18,9 @@ SITES=      https://github.com/libsndfile/lib
>  
>  WANTLIB=     m
>  
> -CONFIGURE_STYLE=gnu
> +AUTOCONF_VERSION=    2.71
> +AUTOMAKE_VERSION=    1.16
> +CONFIGURE_STYLE=autoreconf
>  CONFIGURE_ARGS=      --disable-cpu-clip \
>               --disable-fftw \
>               --disable-sndfile
> Index: patches/patch-configure_ac
> ===================================================================
> RCS file: patches/patch-configure_ac
> diff -N patches/patch-configure_ac
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-configure_ac        27 Apr 2024 00:26:05 -0000
> @@ -0,0 +1,43 @@
> +- Implement SSE2 lrint() and lrintf()
> +  7a81766b14fa03e97822cf1e0b1651648df13116
> +- use sse2 intrinsics for lrint/lrintf only on windows x64
> +  c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
> +- sse2 lrint/lrintf updates
> +  c164eaa25ffdeedc7d25e731172cc45a25f483d4
> +
> +Index: configure.ac
> +--- configure.ac.orig
> ++++ configure.ac
> +@@ -89,7 +89,7 @@ m4_define([abi_version_patch], [lt_revision])
> + 
> + dnl 
> ====================================================================================
> + 
> +-AC_CHECK_HEADERS([stdbool.h stdint.h sys/times.h unistd.h])
> ++AC_CHECK_HEADERS([stdbool.h stdint.h sys/times.h unistd.h immintrin.h])
> + 
> + dnl 
> ====================================================================================
> + dnl  Couple of initializations here. Fill in real values later.
> +@@ -105,6 +105,9 @@ AC_ARG_ENABLE([werror],
> + AC_ARG_ENABLE([cpu-clip],
> +     [AS_HELP_STRING([--disable-cpu-clip], [disable tricky cpu specific 
> clipper])])
> + 
> ++AC_ARG_ENABLE([sse2-lrint],
> ++    [AS_HELP_STRING([--enable-sse2-lrint], [implement lrintf using SSE2 on 
> x86 CPUs if possible])])
> ++
> + AC_ARG_ENABLE([sndfile],
> +     [AS_HELP_STRING([--disable-sndfile], [disable support for sndfile 
> (default=autodetect)])], [], [enable_sndfile=auto])
> + 
> +@@ -178,6 +181,13 @@ AS_IF([test "x$enable_cpu_clip" != "xno"], [
> + 
> + AC_DEFINE_UNQUOTED([CPU_CLIPS_POSITIVE], [${ac_cv_c_clip_positive}], [Host 
> processor clips on positive float to int conversion.])
> + AC_DEFINE_UNQUOTED([CPU_CLIPS_NEGATIVE], [${ac_cv_c_clip_negative}], [Host 
> processor clips on negative float to int conversion.])
> ++
> ++dnl 
> ====================================================================================
> ++dnl  Determine if the user enabled lrint implementations using SSE2.
> ++
> ++AS_IF([test "x$enable_sse2_lrint" = "xyes"], [
> ++            CFLAGS="$CFLAGS -DENABLE_SSE2_LRINT"
> ++    ])
> + 
> + dnl 
> ====================================================================================
> + dnl  Check for libsndfile which is required for the test and example 
> programs.
> Index: patches/patch-examples_audio_out_c
> ===================================================================
> RCS file: patches/patch-examples_audio_out_c
> diff -N patches/patch-examples_audio_out_c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-examples_audio_out_c        27 Apr 2024 00:26:05 -0000
> @@ -0,0 +1,19 @@
> +- Implement SSE2 lrint() and lrintf()
> +  7a81766b14fa03e97822cf1e0b1651648df13116
> +- use sse2 intrinsics for lrint/lrintf only on windows x64
> +  c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
> +- sse2 lrint/lrintf updates
> +  c164eaa25ffdeedc7d25e731172cc45a25f483d4
> +
> +Index: examples/audio_out.c
> +--- examples/audio_out.c.orig
> ++++ examples/audio_out.c
> +@@ -960,7 +960,7 @@ solaris_play (get_audio_callback_t callback, AUDIO_OUT
> + 
> +     while ((read_frames = callback (callback_data, float_buffer, BUFFER_LEN 
> / solaris_out->channels)))
> +     {       for (k = 0 ; k < read_frames * solaris_out->channels ; k++)
> +-                    buffer [k] = lrint (32767.0 * float_buffer [k]) ;
> ++                    buffer [k] = psf_lrint (32767.0 * float_buffer [k]) ;
> +             write (solaris_out->fd, buffer, read_frames * 
> solaris_out->channels * sizeof (short)) ;
> +             } ;
> + 
> Index: patches/patch-src_common_h
> ===================================================================
> RCS file: patches/patch-src_common_h
> diff -N patches/patch-src_common_h
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-src_common_h        27 Apr 2024 00:26:05 -0000
> @@ -0,0 +1,98 @@
> +- Implement SSE2 lrint() and lrintf()
> +  7a81766b14fa03e97822cf1e0b1651648df13116
> +- use sse2 intrinsics for lrint/lrintf only on windows x64
> +  c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
> +- sse2 lrint/lrintf updates
> +  c164eaa25ffdeedc7d25e731172cc45a25f483d4
> +
> +Index: src/common.h
> +--- src/common.h.orig
> ++++ src/common.h
> +@@ -14,6 +14,36 @@
> + #include <stdbool.h>
> + #endif
> + 
> ++#if defined(__x86_64__) || defined(_M_X64)
> ++#   define HAVE_SSE2_INTRINSICS
> ++#elif defined(ENABLE_SSE2_LRINT) && (defined(_M_IX86) || defined(__i386__))
> ++#   if defined(_MSC_VER)
> ++#       define HAVE_SSE2_INTRINSICS
> ++#   elif defined(__clang__)
> ++#       ifdef __SSE2__
> ++#           define HAVE_SSE2_INTRINSICS
> ++#       elif (__has_attribute(target))
> ++#           define HAVE_SSE2_INTRINSICS
> ++#           define USE_TARGET_ATTRIBUTE
> ++#       endif
> ++#   elif defined(__GNUC__)
> ++#       ifdef __SSE2__
> ++#           define HAVE_SSE2_INTRINSICS
> ++#       elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
> ++#           define HAVE_SSE2_INTRINSICS
> ++#           define USE_TARGET_ATTRIBUTE
> ++#       endif
> ++#   endif
> ++#endif
> ++
> ++#ifdef HAVE_SSE2_INTRINSICS
> ++#ifdef HAVE_IMMINTRIN_H
> ++#include <immintrin.h>
> ++#else
> ++#include <emmintrin.h>
> ++#endif
> ++#endif /* HAVE_SSE2_INTRINSICS */
> ++
> + #include <math.h>
> + 
> + #ifdef HAVE_VISIBILITY
> +@@ -163,6 +193,41 @@ const char* zoh_get_description (int src_enum) ;
> + SRC_STATE *zoh_state_new (int channels, SRC_ERROR *error) ;
> + 
> + /*----------------------------------------------------------
> ++** SIMD optimized math functions.
> ++*/
> ++
> ++#ifdef HAVE_SSE2_INTRINSICS
> ++static inline int
> ++#ifdef USE_TARGET_ATTRIBUTE
> ++__attribute__((target("sse2")))
> ++#endif
> ++psf_lrintf (float x)
> ++{
> ++    return _mm_cvtss_si32 (_mm_load_ss (&x)) ;
> ++}
> ++static inline int
> ++#ifdef USE_TARGET_ATTRIBUTE
> ++__attribute__((target("sse2")))
> ++#endif
> ++psf_lrint (double x)
> ++{
> ++    return _mm_cvtsd_si32 (_mm_load_sd (&x)) ;
> ++}
> ++
> ++#else
> ++
> ++static inline int psf_lrintf (float x)
> ++{
> ++    return lrintf (x) ;
> ++} /* psf_lrintf */
> ++
> ++static inline int psf_lrint (double x)
> ++{
> ++    return lrint (x) ;
> ++} /* psf_lrint */
> ++#endif
> ++
> ++/*----------------------------------------------------------
> + **  Common static inline functions.
> + */
> + 
> +@@ -170,7 +235,7 @@ static inline double
> + fmod_one (double x)
> + {   double res ;
> + 
> +-    res = x - lrint (x) ;
> ++    res = x - psf_lrint (x) ;
> +     if (res < 0.0)
> +             return res + 1.0 ;
> + 
> Index: patches/patch-src_samplerate_c
> ===================================================================
> RCS file: patches/patch-src_samplerate_c
> diff -N patches/patch-src_samplerate_c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-src_samplerate_c    27 Apr 2024 00:26:05 -0000
> @@ -0,0 +1,28 @@
> +- Implement SSE2 lrint() and lrintf()
> +  7a81766b14fa03e97822cf1e0b1651648df13116
> +- use sse2 intrinsics for lrint/lrintf only on windows x64
> +  c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
> +- sse2 lrint/lrintf updates
> +  c164eaa25ffdeedc7d25e731172cc45a25f483d4
> +
> +Index: src/samplerate.c
> +--- src/samplerate.c.orig
> ++++ src/samplerate.c
> +@@ -445,7 +445,7 @@ src_float_to_short_array (const float *in, short *out,
> +             else if (scaled_value <= -32768.f)
> +                     out [i] = -32768 ;
> +             else
> +-                    out [i] = (short) (lrintf (scaled_value)) ;
> ++                    out [i] = (short) (psf_lrintf (scaled_value)) ;
> +     }
> + } /* src_float_to_short_array */
> + 
> +@@ -477,7 +477,7 @@ src_float_to_int_array (const float *in, int *out, int
> +                     continue ;
> +                     } ;
> + #endif
> +-            out [i] = (int) lrint (scaled_value) ;
> ++            out [i] = (int) psf_lrint (scaled_value) ;
> +             } ;
> + 
> + } /* src_float_to_int_array */
> Index: patches/patch-src_src_linear_c
> ===================================================================
> RCS file: patches/patch-src_src_linear_c
> diff -N patches/patch-src_src_linear_c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-src_src_linear_c    27 Apr 2024 00:26:05 -0000
> @@ -0,0 +1,28 @@
> +- Implement SSE2 lrint() and lrintf()
> +  7a81766b14fa03e97822cf1e0b1651648df13116
> +- use sse2 intrinsics for lrint/lrintf only on windows x64
> +  c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
> +- sse2 lrint/lrintf updates
> +  c164eaa25ffdeedc7d25e731172cc45a25f483d4
> +
> +Index: src/src_linear.c
> +--- src/src_linear.c.orig
> ++++ src/src_linear.c
> +@@ -102,7 +102,7 @@ linear_vari_process (SRC_STATE *state, SRC_DATA *data)
> +             } ;
> + 
> +     rem = fmod_one (input_index) ;
> +-    priv->in_used += state->channels * lrint (input_index - rem) ;
> ++    priv->in_used += state->channels * psf_lrint (input_index - rem) ;
> +     input_index = rem ;
> + 
> +     /* Main processing loop. */
> +@@ -128,7 +128,7 @@ linear_vari_process (SRC_STATE *state, SRC_DATA *data)
> +             input_index += 1.0 / src_ratio ;
> +             rem = fmod_one (input_index) ;
> + 
> +-            priv->in_used += state->channels * lrint (input_index - rem) ;
> ++            priv->in_used += state->channels * psf_lrint (input_index - 
> rem) ;
> +             input_index = rem ;
> +             } ;
> + 
> Index: patches/patch-src_src_sinc_c
> ===================================================================
> RCS file: patches/patch-src_src_sinc_c
> diff -N patches/patch-src_src_sinc_c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-src_src_sinc_c      27 Apr 2024 00:26:05 -0000
> @@ -0,0 +1,148 @@
> +- Implement SSE2 lrint() and lrintf()
> +  7a81766b14fa03e97822cf1e0b1651648df13116
> +- use sse2 intrinsics for lrint/lrintf only on windows x64
> +  c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
> +- sse2 lrint/lrintf updates
> +  c164eaa25ffdeedc7d25e731172cc45a25f483d4
> +
> +Index: src/src_sinc.c
> +--- src/src_sinc.c.orig
> ++++ src/src_sinc.c
> +@@ -132,7 +132,7 @@ static SRC_STATE_VT sinc_mono_state_vt =
> + 
> + static inline increment_t
> + double_to_fp (double x)
> +-{   return (increment_t) (lrint ((x) * FP_ONE)) ;
> ++{   return (increment_t) (psf_lrint ((x) * FP_ONE)) ;
> + } /* double_to_fp */
> + 
> + static inline increment_t
> +@@ -240,7 +240,7 @@ sinc_filter_new (int converter_type, int channels)
> + #endif
> +             }
> + 
> +-            priv->b_len = 3 * (int) lrint ((priv->coeff_half_len + 2.0) / 
> priv->index_inc * SRC_MAX_RATIO + 1) ;
> ++            priv->b_len = 3 * (int) psf_lrint ((priv->coeff_half_len + 2.0) 
> / priv->index_inc * SRC_MAX_RATIO + 1) ;
> +             priv->b_len = MAX (priv->b_len, 4096) ;
> +             priv->b_len *= channels ;
> +             priv->b_len += 1 ; // There is a <= check against 
> samples_in_hand requiring a buffer bigger than the calculation above
> +@@ -458,12 +458,12 @@ sinc_mono_vari_process (SRC_STATE *state, SRC_DATA *da
> +             count /= MIN (state->last_ratio, data->src_ratio) ;
> + 
> +     /* Maximum coefficientson either side of center point. */
> +-    half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
> ++    half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
> + 
> +     input_index = state->last_position ;
> + 
> +     rem = fmod_one (input_index) ;
> +-    filter->b_current = (filter->b_current + state->channels * lrint 
> (input_index - rem)) % filter->b_len ;
> ++    filter->b_current = (filter->b_current + state->channels * psf_lrint 
> (input_index - rem)) % filter->b_len ;
> +     input_index = rem ;
> + 
> +     terminate = 1.0 / src_ratio + 1e-20 ;
> +@@ -505,7 +505,7 @@ sinc_mono_vari_process (SRC_STATE *state, SRC_DATA *da
> +             input_index += 1.0 / src_ratio ;
> +             rem = fmod_one (input_index) ;
> + 
> +-            filter->b_current = (filter->b_current + state->channels * 
> lrint (input_index - rem)) % filter->b_len ;
> ++            filter->b_current = (filter->b_current + state->channels * 
> psf_lrint (input_index - rem)) % filter->b_len ;
> +             input_index = rem ;
> +             } ;
> + 
> +@@ -614,12 +614,12 @@ sinc_stereo_vari_process (SRC_STATE *state, SRC_DATA *
> +             count /= MIN (state->last_ratio, data->src_ratio) ;
> + 
> +     /* Maximum coefficientson either side of center point. */
> +-    half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
> ++    half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
> + 
> +     input_index = state->last_position ;
> + 
> +     rem = fmod_one (input_index) ;
> +-    filter->b_current = (filter->b_current + state->channels * lrint 
> (input_index - rem)) % filter->b_len ;
> ++    filter->b_current = (filter->b_current + state->channels * psf_lrint 
> (input_index - rem)) % filter->b_len ;
> +     input_index = rem ;
> + 
> +     terminate = 1.0 / src_ratio + 1e-20 ;
> +@@ -660,7 +660,7 @@ sinc_stereo_vari_process (SRC_STATE *state, SRC_DATA *
> +             input_index += 1.0 / src_ratio ;
> +             rem = fmod_one (input_index) ;
> + 
> +-            filter->b_current = (filter->b_current + state->channels * 
> lrint (input_index - rem)) % filter->b_len ;
> ++            filter->b_current = (filter->b_current + state->channels * 
> psf_lrint (input_index - rem)) % filter->b_len ;
> +             input_index = rem ;
> +             } ;
> + 
> +@@ -770,12 +770,12 @@ sinc_quad_vari_process (SRC_STATE *state, SRC_DATA *da
> +             count /= MIN (state->last_ratio, data->src_ratio) ;
> + 
> +     /* Maximum coefficientson either side of center point. */
> +-    half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
> ++    half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
> + 
> +     input_index = state->last_position ;
> + 
> +     rem = fmod_one (input_index) ;
> +-    filter->b_current = (filter->b_current + state->channels * lrint 
> (input_index - rem)) % filter->b_len ;
> ++    filter->b_current = (filter->b_current + state->channels * psf_lrint 
> (input_index - rem)) % filter->b_len ;
> +     input_index = rem ;
> + 
> +     terminate = 1.0 / src_ratio + 1e-20 ;
> +@@ -816,7 +816,7 @@ sinc_quad_vari_process (SRC_STATE *state, SRC_DATA *da
> +             input_index += 1.0 / src_ratio ;
> +             rem = fmod_one (input_index) ;
> + 
> +-            filter->b_current = (filter->b_current + state->channels * 
> lrint (input_index - rem)) % filter->b_len ;
> ++            filter->b_current = (filter->b_current + state->channels * 
> psf_lrint (input_index - rem)) % filter->b_len ;
> +             input_index = rem ;
> +             } ;
> + 
> +@@ -925,12 +925,12 @@ sinc_hex_vari_process (SRC_STATE *state, SRC_DATA *dat
> +             count /= MIN (state->last_ratio, data->src_ratio) ;
> + 
> +     /* Maximum coefficientson either side of center point. */
> +-    half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
> ++    half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
> + 
> +     input_index = state->last_position ;
> + 
> +     rem = fmod_one (input_index) ;
> +-    filter->b_current = (filter->b_current + state->channels * lrint 
> (input_index - rem)) % filter->b_len ;
> ++    filter->b_current = (filter->b_current + state->channels * psf_lrint 
> (input_index - rem)) % filter->b_len ;
> +     input_index = rem ;
> + 
> +     terminate = 1.0 / src_ratio + 1e-20 ;
> +@@ -971,7 +971,7 @@ sinc_hex_vari_process (SRC_STATE *state, SRC_DATA *dat
> +             input_index += 1.0 / src_ratio ;
> +             rem = fmod_one (input_index) ;
> + 
> +-            filter->b_current = (filter->b_current + state->channels * 
> lrint (input_index - rem)) % filter->b_len ;
> ++            filter->b_current = (filter->b_current + state->channels * 
> psf_lrint (input_index - rem)) % filter->b_len ;
> +             input_index = rem ;
> +             } ;
> + 
> +@@ -1090,12 +1090,12 @@ sinc_multichan_vari_process (SRC_STATE *state, 
> SRC_DAT
> +             count /= MIN (state->last_ratio, data->src_ratio) ;
> + 
> +     /* Maximum coefficientson either side of center point. */
> +-    half_filter_chan_len = state->channels * (int) (lrint (count) + 1) ;
> ++    half_filter_chan_len = state->channels * (int) (psf_lrint (count) + 1) ;
> + 
> +     input_index = state->last_position ;
> + 
> +     rem = fmod_one (input_index) ;
> +-    filter->b_current = (filter->b_current + state->channels * lrint 
> (input_index - rem)) % filter->b_len ;
> ++    filter->b_current = (filter->b_current + state->channels * psf_lrint 
> (input_index - rem)) % filter->b_len ;
> +     input_index = rem ;
> + 
> +     terminate = 1.0 / src_ratio + 1e-20 ;
> +@@ -1136,7 +1136,7 @@ sinc_multichan_vari_process (SRC_STATE *state, SRC_DAT
> +             input_index += 1.0 / src_ratio ;
> +             rem = fmod_one (input_index) ;
> + 
> +-            filter->b_current = (filter->b_current + state->channels * 
> lrint (input_index - rem)) % filter->b_len ;
> ++            filter->b_current = (filter->b_current + state->channels * 
> psf_lrint (input_index - rem)) % filter->b_len ;
> +             input_index = rem ;
> +             } ;
> + 
> Index: patches/patch-src_src_zoh_c
> ===================================================================
> RCS file: patches/patch-src_src_zoh_c
> diff -N patches/patch-src_src_zoh_c
> --- /dev/null 1 Jan 1970 00:00:00 -0000
> +++ patches/patch-src_src_zoh_c       27 Apr 2024 00:26:05 -0000
> @@ -0,0 +1,28 @@
> +- Implement SSE2 lrint() and lrintf()
> +  7a81766b14fa03e97822cf1e0b1651648df13116
> +- use sse2 intrinsics for lrint/lrintf only on windows x64
> +  c01e2405612ad3561bf93e8e6dddb9ba0dffe4d9
> +- sse2 lrint/lrintf updates
> +  c164eaa25ffdeedc7d25e731172cc45a25f483d4
> +
> +Index: src/src_zoh.c
> +--- src/src_zoh.c.orig
> ++++ src/src_zoh.c
> +@@ -99,7 +99,7 @@ zoh_vari_process (SRC_STATE *state, SRC_DATA *data)
> +             } ;
> + 
> +     rem = fmod_one (input_index) ;
> +-    priv->in_used += state->channels * lrint (input_index - rem) ;
> ++    priv->in_used += state->channels * psf_lrint (input_index - rem) ;
> +     input_index = rem ;
> + 
> +     /* Main processing loop. */
> +@@ -117,7 +117,7 @@ zoh_vari_process (SRC_STATE *state, SRC_DATA *data)
> +             input_index += 1.0 / src_ratio ;
> +             rem = fmod_one (input_index) ;
> + 
> +-            priv->in_used += state->channels * lrint (input_index - rem) ;
> ++            priv->in_used += state->channels * psf_lrint (input_index - 
> rem) ;
> +             input_index = rem ;
> +             } ;
> + 
> 
> 

Reply via email to