Enabling CUDA support adds some extremely useful features but it also adds hard runtime dependency on NVidia driver. This commit removes that dependency; driver library would be loaded when required. This allows to use same CUDA-enabled FFMpeg build on machines with and without NVidia cards. CUDA toolkit is still needed at build time. scale_npp filter still adds runtime dependency on libnppc and libnppi (which is fine since these libraries have to be redistributed with FFMpeg anyway, and they load CUDA internally on demand as well). --- Same patch was sent to libav ML
configure | 9 ++-- libavcodec/nvenc.c | 24 ++++++---- libavfilter/vf_hwupload_cuda.c | 22 ++++++--- libavfilter/vf_scale_npp.c | 12 ++++- libavutil/Makefile | 4 +- libavutil/cuda_api.c | 102 +++++++++++++++++++++++++++++++++++++++++ libavutil/cuda_api.h | 53 +++++++++++++++++++++ libavutil/hwcontext_cuda.c | 36 +++++++++------ 8 files changed, 227 insertions(+), 35 deletions(-) create mode 100644 libavutil/cuda_api.c create mode 100644 libavutil/cuda_api.h
diff --git a/configure b/configure index cc2c9e7..ab2d9c9 100755 --- a/configure +++ b/configure @@ -157,7 +157,7 @@ Hardware accelerators: --disable-vdpau disable VDPAU code [autodetect] Hardware-accelerated decoding/encoding: - --enable-cuda enable dynamically linked CUDA [no] + --enable-cuda enable CUDA support [no] --enable-libmfx enable HW acceleration through libmfx --enable-mmal enable decoding via MMAL [no] --enable-nvenc enable NVIDIA NVENC support [no] @@ -5577,8 +5577,11 @@ enabled avfoundation_indev && { check_lib2 CoreGraphics/CoreGraphics.h CGGetActi enabled avisynth && { { check_lib2 "windows.h" LoadLibrary; } || { check_lib2 "dlfcn.h" dlopen -ldl; } || die "ERROR: LoadLibrary/dlopen not found for avisynth"; } -enabled cuda && { check_lib cuda.h cuInit -lcuda || - die "ERROR: CUDA not found"; } +enabled cuda && { check_header cuda.h || + die "ERROR: CUDA toolkit not found"; } && + { { check_lib2 "windows.h" LoadLibrary; } || + { check_lib2 "dlfcn.h" dlopen -ldl; } || + die "ERROR: LoadLibrary/dlopen not found for CUDA"; } enabled chromaprint && require chromaprint chromaprint.h chromaprint_get_version -lchromaprint enabled coreimage_filter && { check_header_objcc QuartzCore/CoreImage.h || disable coreimage_filter; } enabled coreimagesrc_filter && { check_header_objcc QuartzCore/CoreImage.h || disable coreimagesrc_filter; } diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c index 73d0584..4ce45b3 100644 --- a/libavcodec/nvenc.c +++ b/libavcodec/nvenc.c @@ -40,8 +40,8 @@ #if CONFIG_CUDA -#include <cuda.h> #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_api.h" #else #if defined(_WIN32) @@ -330,14 +330,20 @@ static av_cold int nvenc_dyload_cuda(AVCodecContext *avctx) NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs; #if CONFIG_CUDA - dl_fn->cu_init = cuInit; - dl_fn->cu_device_get_count = cuDeviceGetCount; - dl_fn->cu_device_get = cuDeviceGet; - dl_fn->cu_device_get_name = cuDeviceGetName; - dl_fn->cu_device_compute_capability = cuDeviceComputeCapability; - dl_fn->cu_ctx_create = cuCtxCreate_v2; - dl_fn->cu_ctx_pop_current = cuCtxPopCurrent_v2; - dl_fn->cu_ctx_destroy = cuCtxDestroy_v2; + AVCudaFunctions* api = avpriv_load_cuda(); + if (!api) { + av_log(avctx, AV_LOG_FATAL, "Failed loading CUDA library\n"); + return 0; + } + + dl_fn->cu_init = api->cuInit; + dl_fn->cu_device_get_count = api->cuDeviceGetCount; + dl_fn->cu_device_get = api->cuDeviceGet; + dl_fn->cu_device_get_name = api->cuDeviceGetName; + dl_fn->cu_device_compute_capability = api->cuDeviceComputeCapability; + dl_fn->cu_ctx_create = api->cuCtxCreate; + dl_fn->cu_ctx_pop_current = api->cuCtxPopCurrent; + dl_fn->cu_ctx_destroy = api->cuCtxDestroy; return 1; #else diff --git a/libavfilter/vf_hwupload_cuda.c b/libavfilter/vf_hwupload_cuda.c index c22221c..f78e219 100644 --- a/libavfilter/vf_hwupload_cuda.c +++ b/libavfilter/vf_hwupload_cuda.c @@ -19,6 +19,7 @@ #include "libavutil/buffer.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_api.h" #include "libavutil/log.h" #include "libavutil/opt.h" @@ -29,6 +30,7 @@ typedef struct CudaUploadContext { const AVClass *class; + AVCudaFunctions *api; int device_idx; AVBufferRef *hwdevice; @@ -38,7 +40,8 @@ typedef struct CudaUploadContext { static void cudaupload_ctx_free(AVHWDeviceContext *ctx) { AVCUDADeviceContext *hwctx = ctx->hwctx; - cuCtxDestroy(hwctx->cuda_ctx); + AVCudaFunctions *api = ctx->user_opaque; + api->cuCtxDestroy(hwctx->cuda_ctx); } static av_cold int cudaupload_init(AVFilterContext *ctx) @@ -52,34 +55,41 @@ static av_cold int cudaupload_init(AVFilterContext *ctx) CUresult err; int ret; - err = cuInit(0); + s->api = avpriv_load_cuda(); + if (!s->api) { + av_log(ctx, AV_LOG_ERROR, "Could not load the CUDA driver API\n"); + return AVERROR_UNKNOWN; + } + + err = s->api->cuInit(0); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n"); return AVERROR_UNKNOWN; } - err = cuDeviceGet(&device, s->device_idx); + err = s->api->cuDeviceGet(&device, s->device_idx); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", s->device_idx); return AVERROR_UNKNOWN; } - err = cuCtxCreate(&cuda_ctx, 0, device); + err = s->api->cuCtxCreate(&cuda_ctx, 0, device); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n"); return AVERROR_UNKNOWN; } - cuCtxPopCurrent(&dummy); + s->api->cuCtxPopCurrent(&dummy); s->hwdevice = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); if (!s->hwdevice) { - cuCtxDestroy(cuda_ctx); + s->api->cuCtxDestroy(cuda_ctx); return AVERROR(ENOMEM); } device_ctx = (AVHWDeviceContext*)s->hwdevice->data; device_ctx->free = cudaupload_ctx_free; + device_ctx->user_opaque = (void*)s->api; device_hwctx = device_ctx->hwctx; device_hwctx->cuda_ctx = cuda_ctx; diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c index 68cee39..d9a9d86 100644 --- a/libavfilter/vf_scale_npp.c +++ b/libavfilter/vf_scale_npp.c @@ -30,6 +30,7 @@ #include "libavutil/eval.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_api.h" #include "libavutil/internal.h" #include "libavutil/mathematics.h" #include "libavutil/opt.h" @@ -100,6 +101,7 @@ typedef struct NPPScaleStageContext { typedef struct NPPScaleContext { const AVClass *class; + AVCudaFunctions *api; NPPScaleStageContext stages[STAGE_NB]; AVFrame *tmp_frame; int passthrough; @@ -130,6 +132,12 @@ static int nppscale_init(AVFilterContext *ctx) NPPScaleContext *s = ctx->priv; int i; + s->api = avpriv_load_cuda(); + if (!s->api) { + av_log(ctx, AV_LOG_FATAL, "CUDA API is not available\n"); + return AVERROR(ENOSYS); + } + if (!strcmp(s->format_str, "same")) { s->format = AV_PIX_FMT_NONE; } else { @@ -579,7 +587,7 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h, INT_MAX); - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + err = s->api->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) { ret = AVERROR_UNKNOWN; goto fail; @@ -587,7 +595,7 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) ret = nppscale_scale(ctx, out, in); - cuCtxPopCurrent(&dummy); + s->api->cuCtxPopCurrent(&dummy); if (ret < 0) goto fail; diff --git a/libavutil/Makefile b/libavutil/Makefile index a35deb6..469e074 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -151,7 +151,7 @@ OBJS = adler32.o \ OBJS-$(!HAVE_ATOMICS_NATIVE) += atomic.o \ -OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o +OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o cuda_api.o OBJS-$(CONFIG_LZO) += lzo.o OBJS-$(CONFIG_OPENCL) += opencl.o opencl_internal.o OBJS-$(CONFIG_VAAPI) += hwcontext_vaapi.o @@ -162,7 +162,7 @@ OBJS += $(COMPAT_OBJS:%=../compat/%) # Windows resource file SLIBOBJS-$(HAVE_GNU_WINDRES) += avutilres.o -SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda.h +SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda.h cuda_api.h SKIPHEADERS-$(CONFIG_VAAPI) += hwcontext_vaapi.h SKIPHEADERS-$(CONFIG_VDPAU) += hwcontext_vdpau.h SKIPHEADERS-$(HAVE_ATOMICS_GCC) += atomic_gcc.h diff --git a/libavutil/cuda_api.c b/libavutil/cuda_api.c new file mode 100644 index 0000000..2657b36 --- /dev/null +++ b/libavutil/cuda_api.c @@ -0,0 +1,102 @@ +/* + * CUDA driver API loader + * Copyright (c) 2016 Andrey Turkin <andrey.tur...@gmail.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#if defined(_WIN32) +#include <windows.h> +#else +#include <dlfcn.h> +#endif + +#include "cuda_api.h" +#include "internal.h" +#include "thread.h" + +#if defined(_WIN32) +#define LOAD_FUNC(l, s) GetProcAddress(l, s) +#define DL_CLOSE_FUNC(l) FreeLibrary(l) +#else +#define LOAD_FUNC(l, s) dlsym(l, s) +#define DL_CLOSE_FUNC(l) dlclose(l) +#endif + +#if defined(_WIN32) +static HMODULE cuda_lib; +#else +static void* cuda_lib; +#endif + +static struct AVCudaFunctions api_table; + +static AVOnce cuda_api_init = AV_ONCE_INIT; + +#define CHECK_LOAD_FUNC_(f, s) \ +do { \ + api_table.f = (void*)LOAD_FUNC(cuda_lib, s); \ + if (!api_table.f) { \ + av_log(NULL, AV_LOG_FATAL, "Failed loading %s from CUDA library\n", s); \ + goto error; \ + } \ +} while (0) + +#define CHECK_LOAD_FUNC(f) CHECK_LOAD_FUNC_(f, #f) +#define CHECK_LOAD_FUNC_V2(f) CHECK_LOAD_FUNC_(f, #f "_v2") + +static av_cold void dyload_cuda(void) +{ +#if defined(_WIN32) + cuda_lib = LoadLibrary(TEXT("nvcuda.dll")); +#else + cuda_lib = dlopen("libcuda.so", RTLD_LAZY); +#endif + + if (!cuda_lib) { + av_log(NULL, AV_LOG_ERROR, "Failed loading CUDA library\n"); + return; + } + + CHECK_LOAD_FUNC(cuInit); + CHECK_LOAD_FUNC(cuDeviceGetCount); + CHECK_LOAD_FUNC(cuDeviceGet); + CHECK_LOAD_FUNC(cuDeviceGetName); + CHECK_LOAD_FUNC(cuDeviceComputeCapability); + CHECK_LOAD_FUNC_V2(cuCtxCreate); + CHECK_LOAD_FUNC_V2(cuCtxPushCurrent); + CHECK_LOAD_FUNC_V2(cuCtxPopCurrent); + CHECK_LOAD_FUNC_V2(cuCtxDestroy); + CHECK_LOAD_FUNC_V2(cuMemAlloc); + CHECK_LOAD_FUNC_V2(cuMemcpy2D); + CHECK_LOAD_FUNC_V2(cuMemFree); + + return; + +error: + DL_CLOSE_FUNC(cuda_lib); + cuda_lib = NULL; +} + +av_cold AVCudaFunctions *avpriv_load_cuda(void) +{ + if (ff_thread_once(&cuda_api_init, dyload_cuda) != 0) { + av_log(NULL, AV_LOG_FATAL, "pthread_once has failed"); + return NULL; + } + return cuda_lib ? &api_table : NULL; +} diff --git a/libavutil/cuda_api.h b/libavutil/cuda_api.h new file mode 100644 index 0000000..576bc4c --- /dev/null +++ b/libavutil/cuda_api.h @@ -0,0 +1,53 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef AVUTIL_CUDA_API_H +#define AVUTIL_CUDA_API_H + +#include <cuda.h> + +/** + * @file + * Dynamic CUDA driver API loader. + */ + + +typedef const struct AVCudaFunctions +{ + CUresult (CUDAAPI *cuInit)(unsigned int Flags); + CUresult (CUDAAPI *cuDeviceGetCount)(int *count); + CUresult (CUDAAPI *cuDeviceGet)(CUdevice *device, int ordinal); + CUresult (CUDAAPI *cuDeviceGetName)(char *name, int len, CUdevice dev); + CUresult (CUDAAPI *cuDeviceComputeCapability)(int *major, int *minor, CUdevice dev); + CUresult (CUDAAPI *cuCtxCreate)(CUcontext *pctx, unsigned int flags, CUdevice dev); + CUresult (CUDAAPI *cuCtxPushCurrent)(CUcontext ctx); + CUresult (CUDAAPI *cuCtxPopCurrent)(CUcontext *pctx); + CUresult (CUDAAPI *cuCtxDestroy)(CUcontext ctx); + CUresult (CUDAAPI *cuMemAlloc)(CUdeviceptr *dptr, unsigned int bytesize); + CUresult (CUDAAPI *cuMemcpy2D)(const CUDA_MEMCPY2D *pCopy); + CUresult (CUDAAPI *cuMemFree)(CUdeviceptr dptr); +} AVCudaFunctions; + +/** + * Loads CUDA driver API if available + * @return Pointer to an API functions table or NULL if API is not available + */ +AVCudaFunctions* avpriv_load_cuda(void); + +#endif /* AVUTIL_CUDA_API_H */ diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 2c5980d..8f74b75 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -21,11 +21,13 @@ #include "hwcontext.h" #include "hwcontext_internal.h" #include "hwcontext_cuda.h" +#include "cuda_api.h" #include "mem.h" #include "pixdesc.h" #include "pixfmt.h" typedef struct CUDAFramesContext { + AVCudaFunctions *api; int shift_width, shift_height; } CUDAFramesContext; @@ -39,44 +41,46 @@ static void cuda_buffer_free(void *opaque, uint8_t *data) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + CUDAFramesContext *priv = ctx->internal->priv; CUcontext dummy; - cuCtxPushCurrent(hwctx->cuda_ctx); + priv->api->cuCtxPushCurrent(hwctx->cuda_ctx); - cuMemFree((CUdeviceptr)data); + priv->api->cuMemFree((CUdeviceptr)data); - cuCtxPopCurrent(&dummy); + priv->api->cuCtxPopCurrent(&dummy); } static AVBufferRef *cuda_pool_alloc(void *opaque, int size) { AVHWFramesContext *ctx = opaque; AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + CUDAFramesContext *priv = ctx->internal->priv; AVBufferRef *ret = NULL; CUcontext dummy = NULL; CUdeviceptr data; CUresult err; - err = cuCtxPushCurrent(hwctx->cuda_ctx); + err = priv->api->cuCtxPushCurrent(hwctx->cuda_ctx); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); return NULL; } - err = cuMemAlloc(&data, size); + err = priv->api->cuMemAlloc(&data, size); if (err != CUDA_SUCCESS) goto fail; ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); if (!ret) { - cuMemFree(data); + priv->api->cuMemFree(data); goto fail; } fail: - cuCtxPopCurrent(&dummy); + priv->api->cuCtxPopCurrent(&dummy); return ret; } @@ -85,6 +89,12 @@ static int cuda_frames_init(AVHWFramesContext *ctx) CUDAFramesContext *priv = ctx->internal->priv; int i; + priv->api = avpriv_load_cuda(); + if (!priv->api) { + av_log(ctx, AV_LOG_FATAL, "CUDA API is not available\n"); + return AVERROR(ENOSYS); + } + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { if (ctx->sw_format == supported_formats[i]) break; @@ -187,7 +197,7 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, CUresult err; int i; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + err = priv->api->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; @@ -203,14 +213,14 @@ static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cuMemcpy2D(&cpy); + err = priv->api->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } - cuCtxPopCurrent(&dummy); + priv->api->cuCtxPopCurrent(&dummy); return 0; } @@ -225,7 +235,7 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, CUresult err; int i; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + err = priv->api->cuCtxPushCurrent(device_hwctx->cuda_ctx); if (err != CUDA_SUCCESS) return AVERROR_UNKNOWN; @@ -241,14 +251,14 @@ static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, .Height = src->height >> (i ? priv->shift_height : 0), }; - err = cuMemcpy2D(&cpy); + err = priv->api->cuMemcpy2D(&cpy); if (err != CUDA_SUCCESS) { av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); return AVERROR_UNKNOWN; } } - cuCtxPopCurrent(&dummy); + priv->api->cuCtxPopCurrent(&dummy); return 0; }
_______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel