Re: [FFmpeg-devel] [PATCH 2/2] nvenc: support d3d11 surface input

2017-11-15 Thread Hendrik Leppkes
On Sun, Nov 12, 2017 at 3:32 PM, Timo Rothenpieler
 wrote:
> Am 12.11.2017 um 10:30 schrieb Hendrik Leppkes:
>>
>> ---
>>   libavcodec/nvenc.c | 106
>> ++---
>>   libavcodec/nvenc.h |  11 +-
>>   2 files changed, 95 insertions(+), 22 deletions(-)
>>
>
> Don't have a setup to test D3D11VA with right now, but it does not break
> stuff on Linux and Cygwin. So if it works for you, that's LGTM as well.
>

Set pushed.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/2] nvenc: support d3d11 surface input

2017-11-12 Thread Timo Rothenpieler

Am 12.11.2017 um 10:30 schrieb Hendrik Leppkes:

---
  libavcodec/nvenc.c | 106 ++---
  libavcodec/nvenc.h |  11 +-
  2 files changed, 95 insertions(+), 22 deletions(-)



Don't have a setup to test D3D11VA with right now, but it does not break 
stuff on Linux and Cygwin. So if it works for you, that's LGTM as well.




smime.p7s
Description: S/MIME Cryptographic Signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/2] nvenc: support d3d11 surface input

2017-11-12 Thread Hendrik Leppkes
On Sun, Nov 12, 2017 at 10:30 AM, Hendrik Leppkes  wrote:
> ---
>  libavcodec/nvenc.c | 106 
> ++---
>  libavcodec/nvenc.h |  11 +-
>  2 files changed, 95 insertions(+), 22 deletions(-)
>

Some details, if anyone cares (tested with 1080p)

On an idle system, performance of d3d11 decode,  hwdownload, nvenc is
largely similar to a d3d11 -> nvenc direct connection, however once
the GPU is busy (I was testing with a 3D rendering going on last
night), hwdownload quickly degrades in performance, while the direct
connection didn't suffer at all.
Additionally, a sharp drop in CPU usage can also be observed.

In case anyone is interested, I plan on working on a D3D11 VPP filter
in the future to allow scaling and deinterlacing in this particular
workflow, using D3D11 Video Processor APIs.

- Hendrik
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/2] nvenc: support d3d11 surface input

2017-11-12 Thread Hendrik Leppkes
---
 libavcodec/nvenc.c | 106 ++---
 libavcodec/nvenc.h |  11 +-
 2 files changed, 95 insertions(+), 22 deletions(-)

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index c685d973c1..eba59634f6 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -45,6 +45,9 @@ const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
 AV_PIX_FMT_0RGB32,
 AV_PIX_FMT_0BGR32,
 AV_PIX_FMT_CUDA,
+#if CONFIG_D3D11VA
+AV_PIX_FMT_D3D11,
+#endif
 AV_PIX_FMT_NONE
 };
 
@@ -172,6 +175,9 @@ static int nvenc_push_context(AVCodecContext *avctx)
 NvencDynLoadFunctions *dl_fn = >nvenc_dload_funcs;
 CUresult cu_res;
 
+if (ctx->d3d11_device)
+return 0;
+
 cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
 if (cu_res != CUDA_SUCCESS) {
 av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
@@ -188,6 +194,9 @@ static int nvenc_pop_context(AVCodecContext *avctx)
 CUresult cu_res;
 CUcontext dummy;
 
+if (ctx->d3d11_device)
+return 0;
+
 cu_res = dl_fn->cuda_dl->cuCtxPopCurrent();
 if (cu_res != CUDA_SUCCESS) {
 av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
@@ -206,8 +215,16 @@ static av_cold int nvenc_open_session(AVCodecContext 
*avctx)
 
 params.version= NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
 params.apiVersion = NVENCAPI_VERSION;
-params.device = ctx->cu_context;
-params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+if (ctx->d3d11_device)
+{
+params.device = ctx->d3d11_device;
+params.deviceType = NV_ENC_DEVICE_TYPE_DIRECTX;
+}
+else
+{
+params.device = ctx->cu_context;
+params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
+}
 
 ret = p_nvenc->nvEncOpenEncodeSessionEx(, >nvencoder);
 if (ret != NV_ENC_SUCCESS) {
@@ -458,23 +475,48 @@ static av_cold int nvenc_setup_device(AVCodecContext 
*avctx)
 return AVERROR_BUG;
 }
 
-if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->hw_frames_ctx || 
avctx->hw_device_ctx) {
+if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == 
AV_PIX_FMT_D3D11 || avctx->hw_frames_ctx || avctx->hw_device_ctx) {
 AVHWFramesContext   *frames_ctx;
 AVHWDeviceContext   *hwdev_ctx;
-AVCUDADeviceContext *device_hwctx;
+AVCUDADeviceContext *cuda_device_hwctx = NULL;
+#if CONFIG_D3D11VA
+AVD3D11VADeviceContext *d3d11_device_hwctx = NULL;
+#endif
 int ret;
 
 if (avctx->hw_frames_ctx) {
 frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
-device_hwctx = frames_ctx->device_ctx->hwctx;
+if (frames_ctx->format == AV_PIX_FMT_CUDA)
+cuda_device_hwctx = frames_ctx->device_ctx->hwctx;
+#if CONFIG_D3D11VA
+else if (frames_ctx->format == AV_PIX_FMT_D3D11)
+d3d11_device_hwctx = frames_ctx->device_ctx->hwctx;
+#endif
+else
+return AVERROR(EINVAL);
 } else if (avctx->hw_device_ctx) {
 hwdev_ctx = (AVHWDeviceContext*)avctx->hw_device_ctx->data;
-device_hwctx = hwdev_ctx->hwctx;
+if (hwdev_ctx->type == AV_HWDEVICE_TYPE_CUDA)
+cuda_device_hwctx = hwdev_ctx->hwctx;
+#if CONFIG_D3D11VA
+else if (hwdev_ctx->type == AV_HWDEVICE_TYPE_D3D11VA)
+d3d11_device_hwctx = hwdev_ctx->hwctx;
+#endif
+else
+return AVERROR(EINVAL);
 } else {
 return AVERROR(EINVAL);
 }
 
-ctx->cu_context = device_hwctx->cuda_ctx;
+if (cuda_device_hwctx) {
+ctx->cu_context = cuda_device_hwctx->cuda_ctx;
+}
+#if CONFIG_D3D11VA
+else if (d3d11_device_hwctx) {
+ctx->d3d11_device = d3d11_device_hwctx->device;
+ID3D11Device_AddRef(ctx->d3d11_device);
+}
+#endif
 
 ret = nvenc_open_session(avctx);
 if (ret < 0)
@@ -1205,7 +1247,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext 
*avctx, int idx)
 NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 };
 allocOut.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
 
-if (avctx->pix_fmt == AV_PIX_FMT_CUDA) {
+if (avctx->pix_fmt == AV_PIX_FMT_CUDA || avctx->pix_fmt == 
AV_PIX_FMT_D3D11) {
 ctx->surfaces[idx].in_ref = av_frame_alloc();
 if (!ctx->surfaces[idx].in_ref)
 return AVERROR(ENOMEM);
@@ -1237,7 +1279,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext 
*avctx, int idx)
 nv_status = p_nvenc->nvEncCreateBitstreamBuffer(ctx->nvencoder, );
 if (nv_status != NV_ENC_SUCCESS) {
 int err = nvenc_print_error(avctx, nv_status, "CreateBitstreamBuffer 
failed");
-if (avctx->pix_fmt != AV_PIX_FMT_CUDA)
+if (avctx->pix_fmt != AV_PIX_FMT_CUDA && avctx->pix_fmt != 
AV_PIX_FMT_D3D11)
 p_nvenc->nvEncDestroyInputBuffer(ctx->nvencoder, 
ctx->surfaces[idx].input_surface);