Re: [libav-devel] [PATCH 02/12] lavu: OpenCL hwcontext implementation

2017-06-28 Thread Mark Thompson
On 28/06/17 15:03, wm4 wrote:
> On Wed, 28 Jun 2017 13:36:30 +0100
> Mark Thompson  wrote:
> 
>> On 28/06/17 12:03, wm4 wrote:
>>> On Tue, 27 Jun 2017 22:50:44 +0100
>>> Mark Thompson  wrote:
>>>   
 ---
  configure  |5 +-
  doc/APIchanges |4 +
  libavutil/Makefile |2 +
  libavutil/hwcontext.c  |4 +
  libavutil/hwcontext.h  |1 +
  libavutil/hwcontext_internal.h |1 +
  libavutil/hwcontext_opencl.c   | 1303 
 
  libavutil/hwcontext_opencl.h   |   96 +++
  libavutil/version.h|4 +-
  9 files changed, 1417 insertions(+), 3 deletions(-)
  create mode 100644 libavutil/hwcontext_opencl.c
  create mode 100644 libavutil/hwcontext_opencl.h
 ...
 +/**
 + * OpenCL frame descriptor for pool allocation.
 + *
 + * In user-allocated pools, AVHWFramesContext.pool must return 
 AVBufferRefs
 + * with the data pointer pointing at an object of this type describing the
 + * planes of the frame.
 + */
 +typedef struct AVOpenCLFrameDescriptor {
 +/**
 + * Number of planes in the frame.
 + */
 +int nb_planes;
 +/**
 + * OpenCL image2d objects for each plane of the frame.
 + */
 +cl_mem planes[AV_NUM_DATA_POINTERS];
 +} AVOpenCLFrameDescriptor;  
>>>
>>> Not sure if this should have more metadata about the formats?  
>>
>> I'm not sure what other metadata you want here?  This structure is used as 
>> the buffer reference, and also then also to carry the objects for some 
>> mapping cases where that is useful.  It doesn't actually end up in the frame 
>> itself.
> 
> Well, the semantics of those are bound to sw_format, but in the end
> it's all a bit obscure, undocumented, and hidden in the source code.
> 
 +
 +/**
 + * OpenCL device details.
 + *
 + * Allocated as AVHWDeviceContext.hwctx
 + */
 +typedef struct AVOpenCLDeviceContext {
 +/**
 + * The primary device ID of the device.  If multiple OpenCL devices
 + * are associated with the context then this is the one which will
 + * be used for all operations internal to Libav.
 + */
 +cl_device_id device_id;
 +/**
 + * The OpenCL context which will contain all operations and frames on
 + * this device.
 + */
 +cl_context context;
 +/**
 + * The default command queue for this device, which will be used by 
 all
 + * frames contexts which do not have their own command queue.  If not
 + * intialised by the user, a default queue will be created on the
 + * primary device.
 + */
 +cl_command_queue command_queue;
 +} AVOpenCLDeviceContext;  
>>>
>>> Is the default queue also set on the public struct if created by Libav?  
>>
>> Not currently - it stays internal so that it is clear where all of the 
>> references to it are.
>>
>> It could be put here with suitable documentation if you want?
> 
> Sure. Should the command queue be accessible to API users? If not, why
> can the API user _set_ it?

The API user can set it in order to be able to enforce operation ordering in 
the way they want.  Since the API isn't exposing any event interface, you need 
some way to be sure that dependent events (such as writing the contents of the 
frame you are about to download) have completed.  Controlling the command queue 
the transfer is executed on allows you to set such dependencies externally, 
with barriers on a common queue or by enqueuing a wait for events on another 
queue.

You can also do everything synchronously (always call clFinish() to make sure 
kernels have finished running) - then none of that is needed and you don't have 
to touch any of this.

(I have thoughts of allowing the opposite case as well, so that transfer 
operations don't need to wait for completion internally.  It would require 
adding a new flag something like AV_HWFRAME_TRANSFER_ASYNCHRONOUS, though, so I 
haven't yet pursued it.)

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 02/12] lavu: OpenCL hwcontext implementation

2017-06-28 Thread wm4
On Wed, 28 Jun 2017 13:36:30 +0100
Mark Thompson  wrote:

> On 28/06/17 12:03, wm4 wrote:
> > On Tue, 27 Jun 2017 22:50:44 +0100
> > Mark Thompson  wrote:
> >   
> >> ---
> >>  configure  |5 +-
> >>  doc/APIchanges |4 +
> >>  libavutil/Makefile |2 +
> >>  libavutil/hwcontext.c  |4 +
> >>  libavutil/hwcontext.h  |1 +
> >>  libavutil/hwcontext_internal.h |1 +
> >>  libavutil/hwcontext_opencl.c   | 1303 
> >> 
> >>  libavutil/hwcontext_opencl.h   |   96 +++
> >>  libavutil/version.h|4 +-
> >>  9 files changed, 1417 insertions(+), 3 deletions(-)
> >>  create mode 100644 libavutil/hwcontext_opencl.c
> >>  create mode 100644 libavutil/hwcontext_opencl.h
> >> ...
> >> +/**
> >> + * OpenCL frame descriptor for pool allocation.
> >> + *
> >> + * In user-allocated pools, AVHWFramesContext.pool must return 
> >> AVBufferRefs
> >> + * with the data pointer pointing at an object of this type describing the
> >> + * planes of the frame.
> >> + */
> >> +typedef struct AVOpenCLFrameDescriptor {
> >> +/**
> >> + * Number of planes in the frame.
> >> + */
> >> +int nb_planes;
> >> +/**
> >> + * OpenCL image2d objects for each plane of the frame.
> >> + */
> >> +cl_mem planes[AV_NUM_DATA_POINTERS];
> >> +} AVOpenCLFrameDescriptor;  
> > 
> > Not sure if this should have more metadata about the formats?  
> 
> I'm not sure what other metadata you want here?  This structure is used as 
> the buffer reference, and also then also to carry the objects for some 
> mapping cases where that is useful.  It doesn't actually end up in the frame 
> itself.

Well, the semantics of those are bound to sw_format, but in the end
it's all a bit obscure, undocumented, and hidden in the source code.

> >> +
> >> +/**
> >> + * OpenCL device details.
> >> + *
> >> + * Allocated as AVHWDeviceContext.hwctx
> >> + */
> >> +typedef struct AVOpenCLDeviceContext {
> >> +/**
> >> + * The primary device ID of the device.  If multiple OpenCL devices
> >> + * are associated with the context then this is the one which will
> >> + * be used for all operations internal to Libav.
> >> + */
> >> +cl_device_id device_id;
> >> +/**
> >> + * The OpenCL context which will contain all operations and frames on
> >> + * this device.
> >> + */
> >> +cl_context context;
> >> +/**
> >> + * The default command queue for this device, which will be used by 
> >> all
> >> + * frames contexts which do not have their own command queue.  If not
> >> + * intialised by the user, a default queue will be created on the
> >> + * primary device.
> >> + */
> >> +cl_command_queue command_queue;
> >> +} AVOpenCLDeviceContext;  
> > 
> > Is the default queue also set on the public struct if created by Libav?  
> 
> Not currently - it stays internal so that it is clear where all of the 
> references to it are.
> 
> It could be put here with suitable documentation if you want?

Sure. Should the command queue be accessible to API users? If not, why
can the API user _set_ it?
___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 02/12] lavu: OpenCL hwcontext implementation

2017-06-28 Thread Mark Thompson
On 28/06/17 12:03, wm4 wrote:
> On Tue, 27 Jun 2017 22:50:44 +0100
> Mark Thompson  wrote:
> 
>> ---
>>  configure  |5 +-
>>  doc/APIchanges |4 +
>>  libavutil/Makefile |2 +
>>  libavutil/hwcontext.c  |4 +
>>  libavutil/hwcontext.h  |1 +
>>  libavutil/hwcontext_internal.h |1 +
>>  libavutil/hwcontext_opencl.c   | 1303 
>> 
>>  libavutil/hwcontext_opencl.h   |   96 +++
>>  libavutil/version.h|4 +-
>>  9 files changed, 1417 insertions(+), 3 deletions(-)
>>  create mode 100644 libavutil/hwcontext_opencl.c
>>  create mode 100644 libavutil/hwcontext_opencl.h
>> ...
>> +/**
>> + * OpenCL frame descriptor for pool allocation.
>> + *
>> + * In user-allocated pools, AVHWFramesContext.pool must return AVBufferRefs
>> + * with the data pointer pointing at an object of this type describing the
>> + * planes of the frame.
>> + */
>> +typedef struct AVOpenCLFrameDescriptor {
>> +/**
>> + * Number of planes in the frame.
>> + */
>> +int nb_planes;
>> +/**
>> + * OpenCL image2d objects for each plane of the frame.
>> + */
>> +cl_mem planes[AV_NUM_DATA_POINTERS];
>> +} AVOpenCLFrameDescriptor;
> 
> Not sure if this should have more metadata about the formats?

I'm not sure what other metadata you want here?  This structure is used as the 
buffer reference, and also then also to carry the objects for some mapping 
cases where that is useful.  It doesn't actually end up in the frame itself.

>> +
>> +/**
>> + * OpenCL device details.
>> + *
>> + * Allocated as AVHWDeviceContext.hwctx
>> + */
>> +typedef struct AVOpenCLDeviceContext {
>> +/**
>> + * The primary device ID of the device.  If multiple OpenCL devices
>> + * are associated with the context then this is the one which will
>> + * be used for all operations internal to Libav.
>> + */
>> +cl_device_id device_id;
>> +/**
>> + * The OpenCL context which will contain all operations and frames on
>> + * this device.
>> + */
>> +cl_context context;
>> +/**
>> + * The default command queue for this device, which will be used by all
>> + * frames contexts which do not have their own command queue.  If not
>> + * intialised by the user, a default queue will be created on the
>> + * primary device.
>> + */
>> +cl_command_queue command_queue;
>> +} AVOpenCLDeviceContext;
> 
> Is the default queue also set on the public struct if created by Libav?

Not currently - it stays internal so that it is clear where all of the 
references to it are.

It could be put here with suitable documentation if you want?

>> +
>> +/**
>> + * OpenCL-specific data associated with a frame pool.
>> + *
>> + * Allocated as AVHWFramesContext.hwctx.
>> + */
>> +typedef struct AVOpenCLFramesContext {
>> +/**
>> + * The command queue used for internal asynchronous operations on this
>> + * device (av_hwframe_transfer_data(), av_hwframe_map()).
>> + *
>> + * If this is not set, the command queue from the associated device is
>> + * used instead.
>> + */
>> +cl_command_queue command_queue;
>> +} AVOpenCLFramesContext;
> 
> Same question.

Same answer.

Thanks,

- Mark

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Re: [libav-devel] [PATCH 02/12] lavu: OpenCL hwcontext implementation

2017-06-28 Thread wm4
On Tue, 27 Jun 2017 22:50:44 +0100
Mark Thompson  wrote:

> ---
>  configure  |5 +-
>  doc/APIchanges |4 +
>  libavutil/Makefile |2 +
>  libavutil/hwcontext.c  |4 +
>  libavutil/hwcontext.h  |1 +
>  libavutil/hwcontext_internal.h |1 +
>  libavutil/hwcontext_opencl.c   | 1303 
> 
>  libavutil/hwcontext_opencl.h   |   96 +++
>  libavutil/version.h|4 +-
>  9 files changed, 1417 insertions(+), 3 deletions(-)
>  create mode 100644 libavutil/hwcontext_opencl.c
>  create mode 100644 libavutil/hwcontext_opencl.h


> +static int opencl_get_plane_format(enum AVPixelFormat pixfmt,
> +   int plane, int width, int height,
> +   cl_image_format *image_format,
> +   cl_image_desc *image_desc)
> +{
> +const AVPixFmtDescriptor *desc;
> +const AVComponentDescriptor *comp;
> +int channels = 0, order = 0, depth = 0, step = 0;
> +int wsub, hsub, alpha;
> +int c;
> +
> +if (plane >= AV_NUM_DATA_POINTERS)
> +return AVERROR(ENOENT);
> +
> +desc = av_pix_fmt_desc_get(pixfmt);
> +
> +// Only normal images are allowed.
> +if (desc->flags & (AV_PIX_FMT_FLAG_BITSTREAM |
> +   AV_PIX_FMT_FLAG_HWACCEL   |
> +   AV_PIX_FMT_FLAG_PAL))
> +return AVERROR(EINVAL);
> +
> +wsub = 1 << desc->log2_chroma_w;
> +hsub = 1 << desc->log2_chroma_h;
> +// Subsampled components must be exact.
> +if (width & wsub - 1 || height & hsub - 1)
> +return AVERROR(EINVAL);
> +
> +for (c = 0; c < desc->nb_components; c++) {
> +comp = >comp[c];
> +if (comp->plane != plane)
> +continue;
> +// The step size must be a power of two.
> +if (comp->step != 1 && comp->step != 2 &&
> +comp->step != 4 && comp->step != 8)
> +return AVERROR(EINVAL);
> +// The bits in each component must be packed in the
> +// most-significant-bits of the relevant bytes.
> +if (comp->shift + comp->depth != 8 &&
> +comp->shift + comp->depth != 16)
> +return AVERROR(EINVAL);
> +// The depth must not vary between components.
> +if (depth && comp->depth != depth)
> +return AVERROR(EINVAL);
> +// If a single data element crosses multiple bytes then
> +// it must match the native endianness.
> +if (comp->depth > 8 &&
> +HAVE_BIGENDIAN == !(desc->flags & AV_PIX_FMT_FLAG_BE))
> +return AVERROR(EINVAL);
> +// A single data element must not contain multiple samples
> +// from the same component.
> +if (step && comp->step != step)
> +return AVERROR(EINVAL);
> +order = order * 10 + c + 1;
> +depth = comp->depth;
> +step  = comp->step;
> +alpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA &&
> + c == desc->nb_components - 1);
> +++channels;
> +}
> +if (channels == 0)
> +return AVERROR(ENOENT);
> +
> +memset(image_format, 0, sizeof(*image_format));
> +memset(image_desc,   0, sizeof(*image_desc));
> +image_desc->image_type = CL_MEM_OBJECT_IMAGE2D;
> +
> +if (plane == 0 || alpha) {
> +image_desc->image_width = width;
> +image_desc->image_height= height;
> +image_desc->image_row_pitch = step * width;
> +} else {
> +image_desc->image_width = width  / wsub;
> +image_desc->image_height= height / hsub;
> +image_desc->image_row_pitch = step * width / wsub;
> +}
> +
> +if (depth <= 8) {
> +image_format->image_channel_data_type = CL_UNORM_INT8;
> +} else {
> +if (depth <= 16)
> +image_format->image_channel_data_type = CL_UNORM_INT16;
> +else
> +return AVERROR(EINVAL);
> +}
> +
> +#define CHANNEL_ORDER(order, type) \
> +case order: image_format->image_channel_order = type; break;
> +switch (order) {
> +CHANNEL_ORDER(1,CL_R);
> +CHANNEL_ORDER(2,CL_R);
> +CHANNEL_ORDER(3,CL_R);
> +CHANNEL_ORDER(4,CL_R);
> +CHANNEL_ORDER(12,   CL_RG);
> +CHANNEL_ORDER(23,   CL_RG);
> +CHANNEL_ORDER(1234, CL_RGBA);
> +CHANNEL_ORDER(3214, CL_BGRA);
> +CHANNEL_ORDER(4123, CL_ARGB);
> +#ifdef CL_ABGR
> +CHANNEL_ORDER(4321, CL_ABGR);
> +#endif
> +default:
> +return AVERROR(EINVAL);
> +}
> +#undef CHANNEL_ORDER
> +
> +return 0;
> +}

I suggest we make a generic helper for this.  I "often" need to know
about component order and whether formats are byte-aligned too. The
pixdesc struct is so generic yet insufficient that this can be quite
tricky and complex.


> +/**
> + * OpenCL frame descriptor for pool allocation.
> + 

[libav-devel] [PATCH 02/12] lavu: OpenCL hwcontext implementation

2017-06-27 Thread Mark Thompson
---
 configure  |5 +-
 doc/APIchanges |4 +
 libavutil/Makefile |2 +
 libavutil/hwcontext.c  |4 +
 libavutil/hwcontext.h  |1 +
 libavutil/hwcontext_internal.h |1 +
 libavutil/hwcontext_opencl.c   | 1303 
 libavutil/hwcontext_opencl.h   |   96 +++
 libavutil/version.h|4 +-
 9 files changed, 1417 insertions(+), 3 deletions(-)
 create mode 100644 libavutil/hwcontext_opencl.c
 create mode 100644 libavutil/hwcontext_opencl.h

diff --git a/configure b/configure
index 96bc5ab55..64ea9d007 100755
--- a/configure
+++ b/configure
@@ -245,6 +245,7 @@ External library support:
   --enable-nvenc   Nvidia video encoding
   --enable-omx OpenMAX IL
   --enable-omx-rpi OpenMAX IL for Raspberry Pi
+  --enable-opencl  OpenCL processing
   --enable-vaapi   Video Acceleration API (mainly Unix/Intel)
   --enable-vda Apple Video Decode Acceleration [auto]
   --enable-vdpau   Nvidia Video Decode and Presentation API for Unix [auto]
@@ -1276,6 +1277,7 @@ HWACCEL_LIBRARY_LIST="
 mmal
 nvenc
 omx
+opencl
 vaapi
 vda
 vdpau
@@ -2553,7 +2555,7 @@ avdevice_extralibs="libm_extralibs"
 avformat_extralibs="libm_extralibs"
 avfilter_extralibs="pthreads_extralibs libm_extralibs"
 avresample_extralibs="libm_extralibs"
-avutil_extralibs="clock_gettime_extralibs cuda_extralibs libm_extralibs 
libmfx_extralibs nanosleep_extralibs pthreads_extralibs user32_extralibs 
vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs vdpau_x11_extralibs 
wincrypt_extralibs"
+avutil_extralibs="clock_gettime_extralibs cuda_extralibs libm_extralibs 
libmfx_extralibs nanosleep_extralibs opencl_extralibs pthreads_extralibs 
user32_extralibs vaapi_extralibs vaapi_drm_extralibs vaapi_x11_extralibs 
vdpau_x11_extralibs wincrypt_extralibs"
 swscale_extralibs="libm_extralibs"
 
 # programs
@@ -4818,6 +4820,7 @@ enabled omx   && require_header OMX_Core.h
 enabled omx_rpi   && { check_header OMX_Core.h ||
{ ! enabled cross_compile && add_cflags 
-isystem/opt/vc/include/IL && check_header OMX_Core.h ; } ||
die "ERROR: OpenMAX IL headers not found"; } && 
enable omx
+enabled opencl&& require opencl CL/cl.h clGetPlatformIDs -lOpenCL
 enabled openssl   && { { check_pkg_config openssl openssl 
openssl/ssl.h OPENSSL_init_ssl ||
  check_pkg_config openssl openssl 
openssl/ssl.h SSL_library_init; } ||
check_lib openssl openssl/ssl.h 
SSL_library_init -lssl -lcrypto ||
diff --git a/doc/APIchanges b/doc/APIchanges
index 764449bfe..0da8aa899 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,10 @@ libavutil: 2017-03-23
 
 API changes, most recent first:
 
+2017-06-xx - xxx - lavu 55.3.0 - hwcontext.h hwcontext_opencl.h
+  Add AV_HWDEVICE_TYPE_OPENCL and a new installed header with
+  OpenCL-specific hwcontext definitions.
+
 2017-06-xx - xxx - lavu 56.2.1 - pixfmt.h
   Add AV_PIX_FMT_OPENCL.
 
diff --git a/libavutil/Makefile b/libavutil/Makefile
index 6fb24db67..2dcc89a04 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -117,6 +117,7 @@ OBJS-$(CONFIG_D3D11VA)  += 
hwcontext_d3d11va.o
 OBJS-$(CONFIG_DXVA2)+= hwcontext_dxva2.o
 OBJS-$(CONFIG_LIBMFX)   += hwcontext_qsv.o
 OBJS-$(CONFIG_LZO)  += lzo.o
+OBJS-$(CONFIG_OPENCL)   += hwcontext_opencl.o
 OBJS-$(CONFIG_VAAPI)+= hwcontext_vaapi.o
 OBJS-$(CONFIG_VDPAU)+= hwcontext_vdpau.o
 
@@ -126,6 +127,7 @@ SKIPHEADERS-$(CONFIG_CUDA) += hwcontext_cuda.h
 SKIPHEADERS-$(CONFIG_D3D11VA)  += hwcontext_d3d11va.h
 SKIPHEADERS-$(CONFIG_DXVA2)+= hwcontext_dxva2.h
 SKIPHEADERS-$(CONFIG_LIBMFX)   += hwcontext_qsv.h
+SKIPHEADERS-$(CONFIG_OPENCL)   += hwcontext_opencl.h
 SKIPHEADERS-$(CONFIG_VAAPI)+= hwcontext_vaapi.h
 SKIPHEADERS-$(CONFIG_VDPAU)+= hwcontext_vdpau.h
 
diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c
index 6dc95bba1..fccfda5ef 100644
--- a/libavutil/hwcontext.c
+++ b/libavutil/hwcontext.c
@@ -41,6 +41,9 @@ static const HWContextType * const hw_table[] = {
 #if CONFIG_LIBMFX
 _hwcontext_type_qsv,
 #endif
+#if CONFIG_OPENCL
+_hwcontext_type_opencl,
+#endif
 #if CONFIG_VAAPI
 _hwcontext_type_vaapi,
 #endif
@@ -55,6 +58,7 @@ static const char *const hw_type_names[] = {
 [AV_HWDEVICE_TYPE_DXVA2]  = "dxva2",
 [AV_HWDEVICE_TYPE_D3D11VA] = "d3d11va",
 [AV_HWDEVICE_TYPE_QSV]= "qsv",
+[AV_HWDEVICE_TYPE_OPENCL] = "opencl",
 [AV_HWDEVICE_TYPE_VAAPI]  = "vaapi",
 [AV_HWDEVICE_TYPE_VDPAU]  = "vdpau",
 };
diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h
index 203ea510e..80a619cc8 100644
--- a/libavutil/hwcontext.h
+++