Re: [FFmpeg-devel] [PATCH 4/4] avcodec/cbrt_tablegen: speed up dynamic table creation

2015-12-01 Thread Ganesh Ajjanagadde
On Sat, Nov 28, 2015 at 12:03 PM, Ganesh Ajjanagadde
 wrote:
> On systems having cbrt, there is no reason to use the slow pow function.
>
> Sample benchmark (x86-64, Haswell, GNU/Linux):
> new:
> 5124920 decicycles in cbrt_tableinit,   1 runs,  0 skips
>
> old:
> 12321680 decicycles in cbrt_tableinit,   1 runs,  0 skips
>
> Reviewed-by: Ronald S. Bultje 
> Signed-off-by: Ganesh Ajjanagadde 
> ---
>  libavcodec/cbrt_tablegen.h  | 5 ++---
>  libavcodec/cbrt_tablegen_template.c | 1 +
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
> index 27a3e3a..59b5a1d 100644
> --- a/libavcodec/cbrt_tablegen.h
> +++ b/libavcodec/cbrt_tablegen.h
> @@ -29,7 +29,7 @@
>  #include "libavcodec/aac_defines.h"
>
>  #if USE_FIXED
> -#define CBRT(x) (int)floor((x).f * 8192 + 0.5)
> +#define CBRT(x) lrint((x).f * 8192)
>  #else
>  #define CBRT(x) x.i
>  #endif
> @@ -49,13 +49,12 @@ static av_cold void AAC_RENAME(cbrt_tableinit)(void)
>  {
>  if (!cbrt_tab[(1<<13) - 1]) {
>  int i;
> -/* cbrtf() isn't available on all systems, so we use powf(). */
>  for (i = 0; i < 1<<13; i++) {
>  union {
>  float f;
>  uint32_t i;
>  } f;
> -f.f = pow(i, 1.0 / 3.0) * i;
> +f.f = cbrt(i) * i;
>  cbrt_tab[i] = CBRT(f);
>  }
>  }
> diff --git a/libavcodec/cbrt_tablegen_template.c 
> b/libavcodec/cbrt_tablegen_template.c
> index 9dd2cf5..1d71d34 100644
> --- a/libavcodec/cbrt_tablegen_template.c
> +++ b/libavcodec/cbrt_tablegen_template.c
> @@ -23,6 +23,7 @@
>  #include 
>  #define CONFIG_HARDCODED_TABLES 0
>  #include "cbrt_tablegen.h"
> +#include "libavutil/tablegen.h"
>  #include "tableprint.h"
>
>  int main(void)
> --
> 2.6.2
>

pushed, identical stand-alone patch reviewed by Ronald.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 4/4] avcodec/cbrt_tablegen: speed up dynamic table creation

2015-11-28 Thread Ganesh Ajjanagadde
On systems having cbrt, there is no reason to use the slow pow function.

Sample benchmark (x86-64, Haswell, GNU/Linux):
new:
5124920 decicycles in cbrt_tableinit,   1 runs,  0 skips

old:
12321680 decicycles in cbrt_tableinit,   1 runs,  0 skips

Reviewed-by: Ronald S. Bultje 
Signed-off-by: Ganesh Ajjanagadde 
---
 libavcodec/cbrt_tablegen.h  | 5 ++---
 libavcodec/cbrt_tablegen_template.c | 1 +
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h
index 27a3e3a..59b5a1d 100644
--- a/libavcodec/cbrt_tablegen.h
+++ b/libavcodec/cbrt_tablegen.h
@@ -29,7 +29,7 @@
 #include "libavcodec/aac_defines.h"
 
 #if USE_FIXED
-#define CBRT(x) (int)floor((x).f * 8192 + 0.5)
+#define CBRT(x) lrint((x).f * 8192)
 #else
 #define CBRT(x) x.i
 #endif
@@ -49,13 +49,12 @@ static av_cold void AAC_RENAME(cbrt_tableinit)(void)
 {
 if (!cbrt_tab[(1<<13) - 1]) {
 int i;
-/* cbrtf() isn't available on all systems, so we use powf(). */
 for (i = 0; i < 1<<13; i++) {
 union {
 float f;
 uint32_t i;
 } f;
-f.f = pow(i, 1.0 / 3.0) * i;
+f.f = cbrt(i) * i;
 cbrt_tab[i] = CBRT(f);
 }
 }
diff --git a/libavcodec/cbrt_tablegen_template.c 
b/libavcodec/cbrt_tablegen_template.c
index 9dd2cf5..1d71d34 100644
--- a/libavcodec/cbrt_tablegen_template.c
+++ b/libavcodec/cbrt_tablegen_template.c
@@ -23,6 +23,7 @@
 #include 
 #define CONFIG_HARDCODED_TABLES 0
 #include "cbrt_tablegen.h"
+#include "libavutil/tablegen.h"
 #include "tableprint.h"
 
 int main(void)
-- 
2.6.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel