Allow for av_malloc() to allocate memory that could be mapped into the GPU address space. This requires allocations on page boundaries. On the video memory buffers side, this requires minimal alignment of strides to 64 bytes.
Option 1: use heuristics in av_malloc() - break down into mem, frame, and avcodec changes. Option 2: use a finer decision model - mem: add av_malloc_aligned() - buffer: add av_buffer_alloc2() with align and/or flags - frame/avcodec: use new APIs Signed-off-by: Gwenole Beauchesne <gwenole.beauche...@intel.com> --- configure | 1 + libavcodec/internal.h | 4 +++- libavutil/mem.c | 42 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/configure b/configure index a38b290..25081a8 100755 --- a/configure +++ b/configure @@ -6262,6 +6262,7 @@ cat > $TMPH <<EOF #define BUILDSUF "$build_suffix" #define SLIBSUF "$SLIBSUF" #define HAVE_MMX2 HAVE_MMXEXT +#define HAVE_GPU (CONFIG_OPENCL || CONFIG_VAAPI) #define SWS_MAX_FILTER_SIZE $sws_max_filter_size EOF diff --git a/libavcodec/internal.h b/libavcodec/internal.h index 0abe17f..e12e01a 100644 --- a/libavcodec/internal.h +++ b/libavcodec/internal.h @@ -69,7 +69,9 @@ #define FF_SIGNBIT(x) ((x) >> CHAR_BIT * sizeof(x) - 1) -#if HAVE_AVX +#if HAVE_GPU +# define STRIDE_ALIGN 64 +#elif HAVE_AVX # define STRIDE_ALIGN 32 #elif HAVE_SIMD_ALIGN_16 # define STRIDE_ALIGN 16 diff --git a/libavutil/mem.c b/libavutil/mem.c index 323b183..b707d7e 100644 --- a/libavutil/mem.c +++ b/libavutil/mem.c @@ -35,6 +35,9 @@ #if HAVE_MALLOC_H #include <malloc.h> #endif +#if HAVE_UNISTD_H +#include <unistd.h> +#endif #include "avassert.h" #include "avutil.h" @@ -61,7 +64,33 @@ void free(void *ptr); #include "mem_internal.h" -#define ALIGN (HAVE_AVX ? 32 : 16) +#define ALIGN (HAVE_GPU ? 64 : (HAVE_AVX ? 32 : 16)) + +static size_t get_page_size(void) +{ + static size_t page_size; + + if (!page_size) { +#ifdef HAVE_UNISTD_H + page_size = getpagesize(); +#else + page_size = 4096; +#endif + } + return page_size; +} + +static void get_malloc_props(size_t *size_ptr, size_t *align_ptr) +{ + size_t align = ALIGN; + + /* Heuristic: use GPU mappable buffers for at least CIF resolutions */ + if (HAVE_GPU && *size_ptr > 288 * FFALIGN(352, align)) { + align = get_page_size(); + *size_ptr = FFALIGN(*size_ptr, align); + } + *align_ptr = align; +} /* NOTE: if you want to override these functions with your own * implementations (not recommended) you have to link libav* as @@ -80,11 +109,18 @@ void *av_malloc(size_t size) #if CONFIG_MEMALIGN_HACK long diff; #endif +#if HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN + size_t align; +#endif /* let's disallow possibly ambiguous cases */ if (size > (max_alloc_size - 32)) return NULL; +#if HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN + get_malloc_props(&size, &align); +#endif + #if CONFIG_MEMALIGN_HACK ptr = malloc(size + ALIGN); if (!ptr) @@ -94,13 +130,13 @@ void *av_malloc(size_t size) ((char *)ptr)[-1] = diff; #elif HAVE_POSIX_MEMALIGN if (size) //OS X on SDK 10.6 has a broken posix_memalign implementation - if (posix_memalign(&ptr, ALIGN, size)) + if (posix_memalign(&ptr, align, size)) ptr = NULL; #elif HAVE_ALIGNED_MALLOC ptr = _aligned_malloc(size, ALIGN); #elif HAVE_MEMALIGN #ifndef __DJGPP__ - ptr = memalign(ALIGN, size); + ptr = memalign(align, size); #else ptr = memalign(size, ALIGN); #endif -- 1.9.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel