Allow for av_malloc() to allocate memory that could be mapped into
the GPU address space. This requires allocations on page boundaries.
On the video memory buffers side, this requires minimal alignment of
strides to 64 bytes.

Option 1: use heuristics in av_malloc()
- break down into mem, frame, and avcodec changes.

Option 2: use a finer decision model
- mem: add av_malloc_aligned()
- buffer: add av_buffer_alloc2() with align and/or flags
- frame/avcodec: use new APIs

Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com>
---
 configure             |  1 +
 libavcodec/internal.h |  4 +++-
 libavutil/mem.c       | 42 +++++++++++++++++++++++++++++++++++++++---
 3 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index a38b290..25081a8 100755
--- a/configure
+++ b/configure
@@ -6262,6 +6262,7 @@ cat > $TMPH <<EOF
 #define BUILDSUF "$build_suffix"
 #define SLIBSUF "$SLIBSUF"
 #define HAVE_MMX2 HAVE_MMXEXT
+#define HAVE_GPU (CONFIG_OPENCL || CONFIG_VAAPI)
 #define SWS_MAX_FILTER_SIZE $sws_max_filter_size
 EOF
 
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
index 0abe17f..e12e01a 100644
--- a/libavcodec/internal.h
+++ b/libavcodec/internal.h
@@ -69,7 +69,9 @@
 
 #define FF_SIGNBIT(x) ((x) >> CHAR_BIT * sizeof(x) - 1)
 
-#if HAVE_AVX
+#if HAVE_GPU
+#   define STRIDE_ALIGN 64
+#elif HAVE_AVX
 #   define STRIDE_ALIGN 32
 #elif HAVE_SIMD_ALIGN_16
 #   define STRIDE_ALIGN 16
diff --git a/libavutil/mem.c b/libavutil/mem.c
index 323b183..b707d7e 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -35,6 +35,9 @@
 #if HAVE_MALLOC_H
 #include <malloc.h>
 #endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
 
 #include "avassert.h"
 #include "avutil.h"
@@ -61,7 +64,33 @@ void  free(void *ptr);
 
 #include "mem_internal.h"
 
-#define ALIGN (HAVE_AVX ? 32 : 16)
+#define ALIGN (HAVE_GPU ? 64 : (HAVE_AVX ? 32 : 16))
+
+static size_t get_page_size(void)
+{
+    static size_t page_size;
+
+    if (!page_size) {
+#ifdef HAVE_UNISTD_H
+        page_size = getpagesize();
+#else
+        page_size = 4096;
+#endif
+    }
+    return page_size;
+}
+
+static void get_malloc_props(size_t *size_ptr, size_t *align_ptr)
+{
+    size_t align = ALIGN;
+
+    /* Heuristic: use GPU mappable buffers for at least CIF resolutions */
+    if (HAVE_GPU && *size_ptr > 288 * FFALIGN(352, align)) {
+        align = get_page_size();
+        *size_ptr = FFALIGN(*size_ptr, align);
+    }
+    *align_ptr = align;
+}
 
 /* NOTE: if you want to override these functions with your own
  * implementations (not recommended) you have to link libav* as
@@ -80,11 +109,18 @@ void *av_malloc(size_t size)
 #if CONFIG_MEMALIGN_HACK
     long diff;
 #endif
+#if HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN
+    size_t align;
+#endif
 
     /* let's disallow possibly ambiguous cases */
     if (size > (max_alloc_size - 32))
         return NULL;
 
+#if HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN
+    get_malloc_props(&size, &align);
+#endif
+
 #if CONFIG_MEMALIGN_HACK
     ptr = malloc(size + ALIGN);
     if (!ptr)
@@ -94,13 +130,13 @@ void *av_malloc(size_t size)
     ((char *)ptr)[-1] = diff;
 #elif HAVE_POSIX_MEMALIGN
     if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation
-    if (posix_memalign(&ptr, ALIGN, size))
+    if (posix_memalign(&ptr, align, size))
         ptr = NULL;
 #elif HAVE_ALIGNED_MALLOC
     ptr = _aligned_malloc(size, ALIGN);
 #elif HAVE_MEMALIGN
 #ifndef __DJGPP__
-    ptr = memalign(ALIGN, size);
+    ptr = memalign(align, size);
 #else
     ptr = memalign(size, ALIGN);
 #endif
-- 
1.9.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to