Re: [PATCH v2] drm/vkms: fix 32bit compilation error by replacing macros
Reviewed-by: Igor Torrente On 9/10/22 16:03, Melissa Wen wrote: Replace vkms_formats macro for fixed-point operations with functions from drm/drm_fixed.h to do the same job and fix 32-bit compilation errors. v2: - don't cast results to s32 (Igor) - add missing drm_fixp2int conversion (Igor) Fixes: a19c2ac9858 ("drm: vkms: Add support to the RGB565 format") Tested-by: Sudip Mukherjee (v1) Reviewed-by: Alex Deucher (v1) Reported-by: Sudip Mukherjee Reported-by: kernel test robot Signed-off-by: Melissa Wen --- drivers/gpu/drm/vkms/vkms_formats.c | 53 +++-- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index 300abb4d1dfe..d4950688b3f1 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -1,27 +1,12 @@ // SPDX-License-Identifier: GPL-2.0+ -#include +#include #include +#include +#include #include "vkms_formats.h" -/* The following macros help doing fixed point arithmetic. */ -/* - * With Fixed-Point scale 15 we have 17 and 15 bits of integer and fractional - * parts respectively. - * | 0.000 | - * 31 0 - */ -#define SHIFT 15 - -#define INT_TO_FIXED(a) ((a) << SHIFT) -#define FIXED_MUL(a, b) ((s32)(((s64)(a) * (b)) >> SHIFT)) -#define FIXED_DIV(a, b) ((s32)(((s64)(a) << SHIFT) / (b))) -/* This macro converts a fixed point number to int, and round half up it */ -#define FIXED_TO_INT_ROUND(a) (((a) + (1 << (SHIFT - 1))) >> SHIFT) -#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b))) -#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b))) - static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int y) { return frame_info->offset + (y * frame_info->pitch) @@ -137,19 +122,19 @@ static void RGB565_to_argb_u16(struct line_buffer *stage_buffer, int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), stage_buffer->n_pixels); - s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31); - s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63); + s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31)); + s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63)); for (size_t x = 0; x < x_limit; x++, src_pixels++) { u16 rgb_565 = le16_to_cpu(*src_pixels); - s32 fp_r = INT_TO_FIXED((rgb_565 >> 11) & 0x1f); - s32 fp_g = INT_TO_FIXED((rgb_565 >> 5) & 0x3f); - s32 fp_b = INT_TO_FIXED(rgb_565 & 0x1f); + s64 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f); + s64 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f); + s64 fp_b = drm_int2fixp(rgb_565 & 0x1f); out_pixels[x].a = (u16)0x; - out_pixels[x].r = FIXED_TO_INT_ROUND(FIXED_MUL(fp_r, fp_rb_ratio)); - out_pixels[x].g = FIXED_TO_INT_ROUND(FIXED_MUL(fp_g, fp_g_ratio)); - out_pixels[x].b = FIXED_TO_INT_ROUND(FIXED_MUL(fp_b, fp_rb_ratio)); + out_pixels[x].r = drm_fixp2int(drm_fixp_mul(fp_r, fp_rb_ratio)); + out_pixels[x].g = drm_fixp2int(drm_fixp_mul(fp_g, fp_g_ratio)); + out_pixels[x].b = drm_fixp2int(drm_fixp_mul(fp_b, fp_rb_ratio)); } } @@ -248,17 +233,17 @@ static void argb_u16_to_RGB565(struct vkms_frame_info *frame_info, int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), src_buffer->n_pixels); - s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31); - s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63); + s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31)); + s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63)); for (size_t x = 0; x < x_limit; x++, dst_pixels++) { - s32 fp_r = INT_TO_FIXED(in_pixels[x].r); - s32 fp_g = INT_TO_FIXED(in_pixels[x].g); - s32 fp_b = INT_TO_FIXED(in_pixels[x].b); + s64 fp_r = drm_int2fixp(in_pixels[x].r); + s64 fp_g = drm_int2fixp(in_pixels[x].g); + s64 fp_b = drm_int2fixp(in_pixels[x].b); - u16 r = FIXED_TO_INT_ROUND(FIXED_DIV(fp_r, fp_rb_ratio)); - u16 g = FIXED_TO_INT_ROUND(FIXED_DIV(fp_g, fp_g_ratio)); - u16 b = FIXED_TO_INT_ROUND(FIXED_DIV(fp_b, fp_rb_ratio)); + u16 r = drm_fixp2int(drm_fixp_div(fp_r, fp_rb_ratio)); + u16 g = drm_fixp2int(drm_fixp_div(fp_g, fp_g_ratio)); + u16 b = drm_fixp2int(drm_fixp_div(fp_b, fp_rb_ratio)); *dst_pixels = cpu_to_le16(r << 11 | g << 5 | b); }
Re: [PATCH] drm/vkms: fix 32bit compilation error by replacing macros
On 9/10/22 16:10, Melissa Wen wrote: On 09/09, Igor Matheus Andrade Torrente wrote: Hi Mellisa, Thanks for the patch fixing my mistakes. On 9/9/22 08:41, Melissa Wen wrote: Replace vkms_formats macros for fixed-point operations with functions from drm/drm_fixed.h to do the same job and fix 32-bit compilation errors. Fixes: a19c2ac9858 ("drm: vkms: Add support to the RGB565 format") Tested-by: Sudip Mukherjee Reported-by: Sudip Mukherjee Reported-by: kernel test robot Signed-off-by: Melissa Wen --- drivers/gpu/drm/vkms/vkms_formats.c | 53 +++-- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index 300abb4d1dfe..ddcd3cfeeaac 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -1,27 +1,12 @@ // SPDX-License-Identifier: GPL-2.0+ -#include +#include #include +#include +#include #include "vkms_formats.h" -/* The following macros help doing fixed point arithmetic. */ -/* - * With Fixed-Point scale 15 we have 17 and 15 bits of integer and fractional - * parts respectively. - * | 0.000 | - * 31 0 - */ -#define SHIFT 15 - -#define INT_TO_FIXED(a) ((a) << SHIFT) -#define FIXED_MUL(a, b) ((s32)(((s64)(a) * (b)) >> SHIFT)) -#define FIXED_DIV(a, b) ((s32)(((s64)(a) << SHIFT) / (b))) -/* This macro converts a fixed point number to int, and round half up it */ -#define FIXED_TO_INT_ROUND(a) (((a) + (1 << (SHIFT - 1))) >> SHIFT) -#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b))) -#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b))) - static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int y) { return frame_info->offset + (y * frame_info->pitch) @@ -137,19 +122,19 @@ static void RGB565_to_argb_u16(struct line_buffer *stage_buffer, int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), stage_buffer->n_pixels); - s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31); - s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63); + s32 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), 31); + s32 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), 63); I think you need to add `drm_int2fixp` to 31 and 63. for (size_t x = 0; x < x_limit; x++, src_pixels++) { u16 rgb_565 = le16_to_cpu(*src_pixels); - s32 fp_r = INT_TO_FIXED((rgb_565 >> 11) & 0x1f); - s32 fp_g = INT_TO_FIXED((rgb_565 >> 5) & 0x3f); - s32 fp_b = INT_TO_FIXED(rgb_565 & 0x1f); + s32 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f); + s32 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f); + s32 fp_b = drm_int2fixp(rgb_565 & 0x1f); And we are cast implicitly from 64 bits int to 32 bits which is implementation-defined AFAIK. So, probably we should be using `s64` for all of these variables. I tested the patch. And I'm seeing some differences in the intermediate results. From my testing, these changes solve those differences. Hi Igor, Thanks for checking the calc results and all inputs provided. I just sent a second version, can you take a look? I replicated your suggestions for RGB565_to_argb_u16() in argb_u16_to_RGB565() and double-checked for i386 and arm. Let me know what yexiuou think. I tested it here and everything seem to be working :). Another thing that may have an impact on the final output is the lack of rounding in drm_fixed.h. This can potentially produce the wrong result. Yeah, I see... I can include a comment about the rounding issue for further improvements, or do you plan to work on it? A comment would be good. Maybe add to the VKMS TODO list? I'm busy with other stuffs, and can't work on this any time soon. But It's pretty simple thing to implement. Thanks, Melissa Thanks, --- Igor Torrente out_pixels[x].a = (u16)0x; - out_pixels[x].r = FIXED_TO_INT_ROUND(FIXED_MUL(fp_r, fp_rb_ratio)); - out_pixels[x].g = FIXED_TO_INT_ROUND(FIXED_MUL(fp_g, fp_g_ratio)); - out_pixels[x].b = FIXED_TO_INT_ROUND(FIXED_MUL(fp_b, fp_rb_ratio)); + out_pixels[x].r = drm_fixp2int(drm_fixp_mul(fp_r, fp_rb_ratio)); + out_pixels[x].g = drm_fixp2int(drm_fixp_mul(fp_g, fp_g_ratio)); + out_pixels[x].b = drm_fixp2int(drm_fixp_mul(fp_b, fp_rb_ratio)); } } @@ -248,17 +233,17 @@ static void argb_u16_to_RGB565(struct vkms_frame_info *frame_info, int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), src_buffer->n_pixels); - s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535,
Re: [PATCH] drm/vkms: fix 32bit compilation error by replacing macros
Hi Mellisa, Thanks for the patch fixing my mistakes. On 9/9/22 08:41, Melissa Wen wrote: Replace vkms_formats macros for fixed-point operations with functions from drm/drm_fixed.h to do the same job and fix 32-bit compilation errors. Fixes: a19c2ac9858 ("drm: vkms: Add support to the RGB565 format") Tested-by: Sudip Mukherjee Reported-by: Sudip Mukherjee Reported-by: kernel test robot Signed-off-by: Melissa Wen --- drivers/gpu/drm/vkms/vkms_formats.c | 53 +++-- 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index 300abb4d1dfe..ddcd3cfeeaac 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -1,27 +1,12 @@ // SPDX-License-Identifier: GPL-2.0+ -#include +#include #include +#include +#include #include "vkms_formats.h" -/* The following macros help doing fixed point arithmetic. */ -/* - * With Fixed-Point scale 15 we have 17 and 15 bits of integer and fractional - * parts respectively. - * | 0.000 | - * 31 0 - */ -#define SHIFT 15 - -#define INT_TO_FIXED(a) ((a) << SHIFT) -#define FIXED_MUL(a, b) ((s32)(((s64)(a) * (b)) >> SHIFT)) -#define FIXED_DIV(a, b) ((s32)(((s64)(a) << SHIFT) / (b))) -/* This macro converts a fixed point number to int, and round half up it */ -#define FIXED_TO_INT_ROUND(a) (((a) + (1 << (SHIFT - 1))) >> SHIFT) -#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b))) -#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b))) - static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int y) { return frame_info->offset + (y * frame_info->pitch) @@ -137,19 +122,19 @@ static void RGB565_to_argb_u16(struct line_buffer *stage_buffer, int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), stage_buffer->n_pixels); - s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31); - s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63); + s32 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), 31); + s32 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), 63); I think you need to add `drm_int2fixp` to 31 and 63. for (size_t x = 0; x < x_limit; x++, src_pixels++) { u16 rgb_565 = le16_to_cpu(*src_pixels); - s32 fp_r = INT_TO_FIXED((rgb_565 >> 11) & 0x1f); - s32 fp_g = INT_TO_FIXED((rgb_565 >> 5) & 0x3f); - s32 fp_b = INT_TO_FIXED(rgb_565 & 0x1f); + s32 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f); + s32 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f); + s32 fp_b = drm_int2fixp(rgb_565 & 0x1f); And we are cast implicitly from 64 bits int to 32 bits which is implementation-defined AFAIK. So, probably we should be using `s64` for all of these variables. I tested the patch. And I'm seeing some differences in the intermediate results. From my testing, these changes solve those differences. Another thing that may have an impact on the final output is the lack of rounding in drm_fixed.h. This can potentially produce the wrong result. Thanks, --- Igor Torrente out_pixels[x].a = (u16)0x; - out_pixels[x].r = FIXED_TO_INT_ROUND(FIXED_MUL(fp_r, fp_rb_ratio)); - out_pixels[x].g = FIXED_TO_INT_ROUND(FIXED_MUL(fp_g, fp_g_ratio)); - out_pixels[x].b = FIXED_TO_INT_ROUND(FIXED_MUL(fp_b, fp_rb_ratio)); + out_pixels[x].r = drm_fixp2int(drm_fixp_mul(fp_r, fp_rb_ratio)); + out_pixels[x].g = drm_fixp2int(drm_fixp_mul(fp_g, fp_g_ratio)); + out_pixels[x].b = drm_fixp2int(drm_fixp_mul(fp_b, fp_rb_ratio)); } } @@ -248,17 +233,17 @@ static void argb_u16_to_RGB565(struct vkms_frame_info *frame_info, int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), src_buffer->n_pixels); - s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31); - s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63); + s32 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), 31); + s32 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), 63); for (size_t x = 0; x < x_limit; x++, dst_pixels++) { - s32 fp_r = INT_TO_FIXED(in_pixels[x].r); - s32 fp_g = INT_TO_FIXED(in_pixels[x].g); - s32 fp_b = INT_TO_FIXED(in_pixels[x].b); + s32 fp_r = drm_int2fixp(in_pixels[x].r); + s32 fp_g = drm_int2fixp(in_pixels[x].g); + s32 fp_b = drm_int2fixp(in_pixels[x].b); - u16 r = FIXED_TO_INT_ROUND(FIXED_DIV(fp_r, fp_rb_ratio)); - u16 g = FIXED_TO_INT_ROUND(FIXED_DIV(fp_g, fp_g_ratio)); - u16 b = FIXED_TO_INT_ROUND(FIXED_DIV(fp_b, fp_rb_ratio)); + u16 r = drm_fixp2int(drm_fixp_div(fp_r, fp_rb_ratio)); +
Re: build failure of next-20220906 due to 396369d67549 ("drm: vkms: Add support to the RGB565 format")
On 9/6/22 18:26, Sudip Mukherjee wrote: On Tue, Sep 6, 2022 at 4:59 PM Sudip Mukherjee (Codethink) wrote: Hi All, The builds of next-20220906 fails for mips, xtensa and arm allmodconfig. The errors in mips and xtensa are: ERROR: modpost: "__divdi3" [drivers/gpu/drm/vkms/vkms.ko] undefined! ERROR: modpost: "__udivdi3" [drivers/gpu/drm/vkms/vkms.ko] undefined! The error in arm is: ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/vkms/vkms.ko] undefined! ERROR: modpost: "__aeabi_ldivmod" [drivers/gpu/drm/vkms/vkms.ko] undefined! Trying to do a git bisect to find out the offending commit. git bisect points to 396369d67549 ("drm: vkms: Add support to the RGB565 format") Are these architectures incapable of doing 64bits int division?
Re: [RESEND v6 6/9] drm: vkms: Refactor the plane composer to accept new formats
On 8/22/22 16:01, Melissa Wen wrote: On 08/22, Igor Matheus Andrade Torrente wrote: Hi Melissa, On 8/20/22 07:51, Melissa Wen wrote: On 08/19, Igor Torrente wrote: Currently the blend function only accepts XRGB_ and ARGB_ as a color input. This patch refactors all the functions related to the plane composition to overcome this limitation. The pixels blend is done using the new internal format. And new handlers are being added to convert a specific format to/from this internal format. So the blend operation depends on these handlers to convert to this common format. The blended result, if necessary, is converted to the writeback buffer format. This patch introduces three major differences to the blend function. 1 - All the planes are blended at once. 2 - The blend calculus is done as per line instead of per pixel. 3 - It is responsible to calculates the CRC and writing the writeback buffer(if necessary). These changes allow us to allocate way less memory in the intermediate buffer to compute these operations. Because now we don't need to have the entire intermediate image lines at once, just one line is enough. | Memory consumption (output dimensions) | |:--:| | Current | This patch| |:--:|:-:| | Width * Heigth | 2 * Width | Beyond memory, we also have a minor performance benefit from all these changes. Results running the IGT[1] test `igt@kms_cursor_crc@pipe-a-cursor-512x512-onscreen` ten times: | Frametime | |:--:| | Implementation | Current | This commit | |:---:|:-:|::| | frametime range | 9~22 ms |5~17 ms | | Average | 11.4 ms |7.8 ms| [1] IGT commit id: bc3f6833a12221a46659535dac06ebb312490eb4 V2: Improves the performance drastically, by performing the operations per-line and not per-pixel(Pekka Paalanen). Minor improvements(Pekka Paalanen). V3: Changes the code to blend the planes all at once. This improves performance, memory consumption, and removes much of the weirdness of the V2(Pekka Paalanen and me). Minor improvements(Pekka Paalanen and me). V4: Rebase the code and adapt it to the new NUM_OVERLAY_PLANES constant. V5: Minor checkpatch fixes and the removal of TO-DO item(Melissa Wen). Several security/robustness improvents(Pekka Paalanen). Removes check_planes_x_bounds function and allows partial partly off-screen(Pekka Paalanen). V6: Fix a mismatch of some variable sizes (Pekka Paalanen). Several minor improvements (Pekka Paalanen). Reported-by: kernel test robot Signed-off-by: Igor Torrente --- Documentation/gpu/vkms.rst| 4 - drivers/gpu/drm/vkms/Makefile | 1 + drivers/gpu/drm/vkms/vkms_composer.c | 320 -- drivers/gpu/drm/vkms/vkms_formats.c | 155 + drivers/gpu/drm/vkms/vkms_formats.h | 12 + drivers/gpu/drm/vkms/vkms_plane.c | 3 + drivers/gpu/drm/vkms/vkms_writeback.c | 3 + 7 files changed, 317 insertions(+), 181 deletions(-) create mode 100644 drivers/gpu/drm/vkms/vkms_formats.c create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h diff --git a/Documentation/gpu/vkms.rst b/Documentation/gpu/vkms.rst index 973e2d43108b..a49e4ae92653 100644 --- a/Documentation/gpu/vkms.rst +++ b/Documentation/gpu/vkms.rst @@ -118,10 +118,6 @@ Add Plane Features There's lots of plane features we could add support for: -- Clearing primary plane: clear primary plane before plane composition (at the - start) for correctness of pixel blend ops. It also guarantees alpha channel - is cleared in the target buffer for stable crc. [Good to get started] - - ARGB format on primary plane: blend the primary plane into background with translucent alpha. diff --git a/drivers/gpu/drm/vkms/Makefile b/drivers/gpu/drm/vkms/Makefile index 72f779cbfedd..1b28a6a32948 100644 --- a/drivers/gpu/drm/vkms/Makefile +++ b/drivers/gpu/drm/vkms/Makefile @@ -3,6 +3,7 @@ vkms-y := \ vkms_drv.o \ vkms_plane.o \ vkms_output.o \ + vkms_formats.o \ vkms_crtc.o \ vkms_composer.o \ vkms_writeback.o diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index b9fb408e8973..5b1a8bdd8268 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -7,204 +7,188 @@ #include #include #include +#include #include "vkms_drv.h" -static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, -const struct vkms_frame_info *frame_info) +static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha) { - u32 pixel; - int src_offset = frame_info->offset + (y * frame_info->pitch) -
Re: [RESEND v6 2/9] drm: vkms: Rename `vkms_composer` to `vkms_frame_info`
On 8/22/22 15:37, Melissa Wen wrote: On 08/22, Igor Matheus Andrade Torrente wrote: Hi Mellisa, On 8/20/22 08:00, Melissa Wen wrote: On 08/19, Igor Torrente wrote: Changes the name of this struct to a more meaningful name. A name that represents better what this struct is about. Composer is the code that do the compositing of the planes. This struct contains information on the frame used in the output composition. Thus, vkms_frame_info is a better name to represent this. V5: Fix a commit message typo(Melissa Wen). Reviewed-by: Melissa Wen Signed-off-by: Igor Torrente --- drivers/gpu/drm/vkms/vkms_composer.c | 87 ++-- drivers/gpu/drm/vkms/vkms_drv.h | 6 +- drivers/gpu/drm/vkms/vkms_plane.c| 38 ++-- 3 files changed, 66 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 775b97766e08..0aded4e87e60 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -11,11 +11,11 @@ #include "vkms_drv.h" static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, -const struct vkms_composer *composer) +const struct vkms_frame_info *frame_info) { u32 pixel; - int src_offset = composer->offset + (y * composer->pitch) - + (x * composer->cpp); + int src_offset = frame_info->offset + (y * frame_info->pitch) + + (x * frame_info->cpp); pixel = *(u32 *)&buffer[src_offset]; @@ -26,24 +26,24 @@ static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, * compute_crc - Compute CRC value on output frame * * @vaddr: address to final framebuffer - * @composer: framebuffer's metadata + * @frame_info: framebuffer's metadata * * returns CRC value computed using crc32 on the visible portion of * the final framebuffer at vaddr_out */ static uint32_t compute_crc(const u8 *vaddr, - const struct vkms_composer *composer) + const struct vkms_frame_info *frame_info) { int x, y; u32 crc = 0, pixel = 0; - int x_src = composer->src.x1 >> 16; - int y_src = composer->src.y1 >> 16; - int h_src = drm_rect_height(&composer->src) >> 16; - int w_src = drm_rect_width(&composer->src) >> 16; + int x_src = frame_info->src.x1 >> 16; + int y_src = frame_info->src.y1 >> 16; + int h_src = drm_rect_height(&frame_info->src) >> 16; + int w_src = drm_rect_width(&frame_info->src) >> 16; for (y = y_src; y < y_src + h_src; ++y) { for (x = x_src; x < x_src + w_src; ++x) { - pixel = get_pixel_from_buffer(x, y, vaddr, composer); + pixel = get_pixel_from_buffer(x, y, vaddr, frame_info); crc = crc32_le(crc, (void *)&pixel, sizeof(u32)); } } @@ -98,8 +98,8 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst) * blend - blend value at vaddr_src with value at vaddr_dst * @vaddr_dst: destination address * @vaddr_src: source address - * @dst_composer: destination framebuffer's metadata - * @src_composer: source framebuffer's metadata + * @dst_frame_info: destination framebuffer's metadata + * @src_frame_info: source framebuffer's metadata * @pixel_blend: blending equation based on plane format * * Blend the vaddr_src value with the vaddr_dst value using a pixel blend @@ -111,33 +111,33 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst) * pixel color values */ static void blend(void *vaddr_dst, void *vaddr_src, - struct vkms_composer *dst_composer, - struct vkms_composer *src_composer, + struct vkms_frame_info *dst_frame_info, + struct vkms_frame_info *src_frame_info, void (*pixel_blend)(const u8 *, u8 *)) { int i, j, j_dst, i_dst; int offset_src, offset_dst; u8 *pixel_dst, *pixel_src; - int x_src = src_composer->src.x1 >> 16; - int y_src = src_composer->src.y1 >> 16; + int x_src = src_frame_info->src.x1 >> 16; + int y_src = src_frame_info->src.y1 >> 16; - int x_dst = src_composer->dst.x1; - int y_dst = src_composer->dst.y1; - int h_dst = drm_rect_height(&src_composer->dst); - int w_dst = drm_rect_width(&src_composer->dst); + int x_dst = src_frame_info->dst.x1; + int y_dst = src_frame_info->dst.y1; + int h_dst = drm_rect_height(&src_frame_info->dst); + int w_dst = drm_rect_width(&src_frame_info->dst)
Re: [RESEND v6 2/9] drm: vkms: Rename `vkms_composer` to `vkms_frame_info`
Hi Mellisa, On 8/20/22 08:00, Melissa Wen wrote: On 08/19, Igor Torrente wrote: Changes the name of this struct to a more meaningful name. A name that represents better what this struct is about. Composer is the code that do the compositing of the planes. This struct contains information on the frame used in the output composition. Thus, vkms_frame_info is a better name to represent this. V5: Fix a commit message typo(Melissa Wen). Reviewed-by: Melissa Wen Signed-off-by: Igor Torrente --- drivers/gpu/drm/vkms/vkms_composer.c | 87 ++-- drivers/gpu/drm/vkms/vkms_drv.h | 6 +- drivers/gpu/drm/vkms/vkms_plane.c| 38 ++-- 3 files changed, 66 insertions(+), 65 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 775b97766e08..0aded4e87e60 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -11,11 +11,11 @@ #include "vkms_drv.h" static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, -const struct vkms_composer *composer) +const struct vkms_frame_info *frame_info) { u32 pixel; - int src_offset = composer->offset + (y * composer->pitch) - + (x * composer->cpp); + int src_offset = frame_info->offset + (y * frame_info->pitch) + + (x * frame_info->cpp); pixel = *(u32 *)&buffer[src_offset]; @@ -26,24 +26,24 @@ static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, * compute_crc - Compute CRC value on output frame * * @vaddr: address to final framebuffer - * @composer: framebuffer's metadata + * @frame_info: framebuffer's metadata * * returns CRC value computed using crc32 on the visible portion of * the final framebuffer at vaddr_out */ static uint32_t compute_crc(const u8 *vaddr, - const struct vkms_composer *composer) + const struct vkms_frame_info *frame_info) { int x, y; u32 crc = 0, pixel = 0; - int x_src = composer->src.x1 >> 16; - int y_src = composer->src.y1 >> 16; - int h_src = drm_rect_height(&composer->src) >> 16; - int w_src = drm_rect_width(&composer->src) >> 16; + int x_src = frame_info->src.x1 >> 16; + int y_src = frame_info->src.y1 >> 16; + int h_src = drm_rect_height(&frame_info->src) >> 16; + int w_src = drm_rect_width(&frame_info->src) >> 16; for (y = y_src; y < y_src + h_src; ++y) { for (x = x_src; x < x_src + w_src; ++x) { - pixel = get_pixel_from_buffer(x, y, vaddr, composer); + pixel = get_pixel_from_buffer(x, y, vaddr, frame_info); crc = crc32_le(crc, (void *)&pixel, sizeof(u32)); } } @@ -98,8 +98,8 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst) * blend - blend value at vaddr_src with value at vaddr_dst * @vaddr_dst: destination address * @vaddr_src: source address - * @dst_composer: destination framebuffer's metadata - * @src_composer: source framebuffer's metadata + * @dst_frame_info: destination framebuffer's metadata + * @src_frame_info: source framebuffer's metadata * @pixel_blend: blending equation based on plane format * * Blend the vaddr_src value with the vaddr_dst value using a pixel blend @@ -111,33 +111,33 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst) * pixel color values */ static void blend(void *vaddr_dst, void *vaddr_src, - struct vkms_composer *dst_composer, - struct vkms_composer *src_composer, + struct vkms_frame_info *dst_frame_info, + struct vkms_frame_info *src_frame_info, void (*pixel_blend)(const u8 *, u8 *)) { int i, j, j_dst, i_dst; int offset_src, offset_dst; u8 *pixel_dst, *pixel_src; - int x_src = src_composer->src.x1 >> 16; - int y_src = src_composer->src.y1 >> 16; + int x_src = src_frame_info->src.x1 >> 16; + int y_src = src_frame_info->src.y1 >> 16; - int x_dst = src_composer->dst.x1; - int y_dst = src_composer->dst.y1; - int h_dst = drm_rect_height(&src_composer->dst); - int w_dst = drm_rect_width(&src_composer->dst); + int x_dst = src_frame_info->dst.x1; + int y_dst = src_frame_info->dst.y1; + int h_dst = drm_rect_height(&src_frame_info->dst); + int w_dst = drm_rect_width(&src_frame_info->dst); int y_limit = y_src + h_dst; int x_limit = x_src + w_dst; for (i = y_src, i_dst = y_dst; i < y_limit; ++i) { for (j = x_src, j_dst = x_dst; j < x_limit; ++j) { - offset_dst = dst_composer->offset -+ (i_dst * dst_composer->pitch) -
Re: [RESEND v6 6/9] drm: vkms: Refactor the plane composer to accept new formats
Hi Melissa, On 8/20/22 07:51, Melissa Wen wrote: On 08/19, Igor Torrente wrote: Currently the blend function only accepts XRGB_ and ARGB_ as a color input. This patch refactors all the functions related to the plane composition to overcome this limitation. The pixels blend is done using the new internal format. And new handlers are being added to convert a specific format to/from this internal format. So the blend operation depends on these handlers to convert to this common format. The blended result, if necessary, is converted to the writeback buffer format. This patch introduces three major differences to the blend function. 1 - All the planes are blended at once. 2 - The blend calculus is done as per line instead of per pixel. 3 - It is responsible to calculates the CRC and writing the writeback buffer(if necessary). These changes allow us to allocate way less memory in the intermediate buffer to compute these operations. Because now we don't need to have the entire intermediate image lines at once, just one line is enough. | Memory consumption (output dimensions) | |:--:| | Current | This patch| |:--:|:-:| | Width * Heigth | 2 * Width | Beyond memory, we also have a minor performance benefit from all these changes. Results running the IGT[1] test `igt@kms_cursor_crc@pipe-a-cursor-512x512-onscreen` ten times: | Frametime | |:--:| | Implementation | Current | This commit | |:---:|:-:|::| | frametime range | 9~22 ms |5~17 ms | | Average | 11.4 ms |7.8 ms| [1] IGT commit id: bc3f6833a12221a46659535dac06ebb312490eb4 V2: Improves the performance drastically, by performing the operations per-line and not per-pixel(Pekka Paalanen). Minor improvements(Pekka Paalanen). V3: Changes the code to blend the planes all at once. This improves performance, memory consumption, and removes much of the weirdness of the V2(Pekka Paalanen and me). Minor improvements(Pekka Paalanen and me). V4: Rebase the code and adapt it to the new NUM_OVERLAY_PLANES constant. V5: Minor checkpatch fixes and the removal of TO-DO item(Melissa Wen). Several security/robustness improvents(Pekka Paalanen). Removes check_planes_x_bounds function and allows partial partly off-screen(Pekka Paalanen). V6: Fix a mismatch of some variable sizes (Pekka Paalanen). Several minor improvements (Pekka Paalanen). Reported-by: kernel test robot Signed-off-by: Igor Torrente --- Documentation/gpu/vkms.rst| 4 - drivers/gpu/drm/vkms/Makefile | 1 + drivers/gpu/drm/vkms/vkms_composer.c | 320 -- drivers/gpu/drm/vkms/vkms_formats.c | 155 + drivers/gpu/drm/vkms/vkms_formats.h | 12 + drivers/gpu/drm/vkms/vkms_plane.c | 3 + drivers/gpu/drm/vkms/vkms_writeback.c | 3 + 7 files changed, 317 insertions(+), 181 deletions(-) create mode 100644 drivers/gpu/drm/vkms/vkms_formats.c create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h diff --git a/Documentation/gpu/vkms.rst b/Documentation/gpu/vkms.rst index 973e2d43108b..a49e4ae92653 100644 --- a/Documentation/gpu/vkms.rst +++ b/Documentation/gpu/vkms.rst @@ -118,10 +118,6 @@ Add Plane Features There's lots of plane features we could add support for: -- Clearing primary plane: clear primary plane before plane composition (at the - start) for correctness of pixel blend ops. It also guarantees alpha channel - is cleared in the target buffer for stable crc. [Good to get started] - - ARGB format on primary plane: blend the primary plane into background with translucent alpha. diff --git a/drivers/gpu/drm/vkms/Makefile b/drivers/gpu/drm/vkms/Makefile index 72f779cbfedd..1b28a6a32948 100644 --- a/drivers/gpu/drm/vkms/Makefile +++ b/drivers/gpu/drm/vkms/Makefile @@ -3,6 +3,7 @@ vkms-y := \ vkms_drv.o \ vkms_plane.o \ vkms_output.o \ + vkms_formats.o \ vkms_crtc.o \ vkms_composer.o \ vkms_writeback.o diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index b9fb408e8973..5b1a8bdd8268 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -7,204 +7,188 @@ #include #include #include +#include #include "vkms_drv.h" -static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, -const struct vkms_frame_info *frame_info) +static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha) { - u32 pixel; - int src_offset = frame_info->offset + (y * frame_info->pitch) - + (x * frame_info->cpp); + u32 new_color; - pixel = *(u32 *)&buffer[src_offset]; + new_color = (src * 0x + d
Re: [PATCH 6/6] drm: vkms: Refactor the plane composer to accept new formats
Hi Pekka, On 10/19/21 5:05 AM, Pekka Paalanen wrote: On Mon, 18 Oct 2021 16:26:06 -0300 Igor Matheus Andrade Torrente wrote: Hi Pekka, On 10/18/21 5:30 AM, Pekka Paalanen wrote: On Tue, 5 Oct 2021 17:16:37 -0300 Igor Matheus Andrade Torrente wrote: Currently the blend function only accepts XRGB_ and ARGB_ as a color input. This patch refactors all the functions related to the plane composition to overcome this limitation. Now the blend function receives a format handler to each plane and a blend function pointer. It will take two ARGB_1616161616 pixels, one for each handler, and will use the blend function to calculate and store the final color in the output buffer. These format handlers will receive the `vkms_composer` and a pair of coordinates. And they should return the respective pixel in the ARGB_16161616 format. The blend function will receive two ARGB_16161616 pixels, x, y, and the vkms_composer of the output buffer. The method should perform the blend operation and store output to the format aforementioned ARGB_16161616. Hi, here are some drive-by comments which you are free to take or leave. To find more efficient ways to do this, you might want research library. It's MIT licensed. IIRC, the elementary operations there operate on pixel lines (pointer + length), not individual pixels (image, x, y). Input conversion function reads and converts a whole line a time. Blending function blends two lines and writes the output into back one of the lines. Output conversion function takes a line and converts it into destination buffer. That way the elementary operations do not need to take stride into account, and blending does not need to deal with varying memory alignment FWIW. The inner loops involve much less function calls and state, probably. I was doing some rudimentary profiling, and I noticed that the code spends most of the time with the indirect system call overhead and not the actual computation. This can definitively help with it. Hi, I suppose you mean function (pointer) call, not system call? Yes, I misspelled it. Really good that you have already profiled things. All optimisations should be guided by profiling, otherwise they are just guesses (and I got lucky this time I guess). Pixman may not do exactly this, but it does something very similar. Pixman also has multiple different levels of optimisations, which may not be necessary for VKMS. It's a trade-off between speed and temporary memory consumed. You need temporary buffers for two lines, but not more (not a whole image in full intermediate precision). Further optimisation could determine whether to process whole image rows as lines, or split a row into multiple lines to stay within CPU cache size. Sorry, I didn't understand the idea of the last sentence. If an image is very wide, a single row could still be relatively large in size (bytes). If it is large enough that the working set falls out of a faster CPU cache into a slower CPU cache (or worse yet, into RAM accesses), performance may suffer and become memory bandwidth limited rather than ALU rate limited. Theoretically that can be worked around by limiting the maximum size of a line, and splitting an image row into multiple lines. Got it now, thanks. However, this is an optimisation one probably should not do until there is performance profiling data showing that it actually is useful. The optimal line size limit depends on the CPU model as well. So it's a bit far out, but something you could keep in mind just in case. OK. For now I will not deal with this complexity, and if necessary I will revisit it latter. Since it seems you are blending multiple planes into a single destination which is assumed to be opaque, you might also be able to do the multiple blends without convert-writing and read-converting to/from the destination between every plane. I think that might be possible to architect on top of the per-line operations still. I tried it. But I don't know how to do this without looking like a mess. I don't think it changes anything, but I forgot to mention that I tried it with the blend per pixel and not a per line. Dedicate one of the temporary line buffers for the destination, and instead of writing it out and loading it back for each input plane, leave it in place over all planes and write it out just once at the end. I do expect more state tracking needed. You need to iterate over the list of planes for each output row, extract only the used part of an input plane's buffer into the other temporary line buffer, and offset the destination line buffer and length to match when passing those into a blending function.+ It's exactly this state tracking that was a mess when I was trying to implement something similar. But this is one more thing that probably I will have to revisit later. It's not an obvious win but a trade-off, so profiling is again needed.
Re: [PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats
Hi Thomas, On 10/19/21 4:17 AM, Thomas Zimmermann wrote: Hi Am 18.10.21 um 21:32 schrieb Igor Matheus Andrade Torrente: Hi Thomas, On 10/18/21 3:06 PM, Thomas Zimmermann wrote: Hi Am 18.10.21 um 19:41 schrieb Igor Matheus Andrade Torrente: Hello, On 10/18/21 7:14 AM, Thomas Zimmermann wrote: Hi Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente: Currently, the vkms atomic check only goes through the first position of the `vkms_wb_formats` vector. This change prepares the atomic_check to check the entire vector. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index 5a3e12f105dc..56978f499203 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, { struct drm_framebuffer *fb; const struct drm_display_mode *mode = &crtc_state->mode; + bool format_supported = false; + int i; if (!conn_state->writeback_job || !conn_state->writeback_job->fb) return 0; @@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, return -EINVAL; } - if (fb->format->format != vkms_wb_formats[0]) { + for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) { + if (fb->format->format == vkms_wb_formats[i]) { + format_supported = true; + break; + } + } At a minimum, this loop should be in a helper function. But more generally, I'm surprised that this isn't already covered by the DRM's atomic helpers. Ok, I can wrap it in a new function. AFAIK the DRM doesn't cover it. But I may be wrong... I couldn't find anything either. Other drivers do similar format and frambuffer checks. So I guess a helper could be implemented. All plane's are supposed to call drm_atomic_helper_check_plane_state() in their atomic_check() code. You could add a similar helper, say drm_atomic_helper_check_writeback_encoder_state(), that tests for the format and maybe other things as well. Do you think this should be done before or after this patch series? Just add it as part of this series and use it for vkms. Other drivers can adopt it later on. The rcar-du code [1] looks similar to the one in vkms. Maybe put the common tests in to the new helper. You can extract the list of supported formats from the property blob, I think. OK, Thanks! Best regards Thomas [1] https://elixir.bootlin.com/linux/v5.14.13/source/drivers/gpu/drm/rcar-du/rcar_du_writeback.c#L140 Best regards Thomas Best regards Thomas + + if (!format_supported) { DRM_DEBUG_KMS("Invalid pixel format %p4cc\n", &fb->format->format); return -EINVAL; Thanks, Igor Torrente
Re: [PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats
Hi Thomas, On 10/18/21 3:06 PM, Thomas Zimmermann wrote: Hi Am 18.10.21 um 19:41 schrieb Igor Matheus Andrade Torrente: Hello, On 10/18/21 7:14 AM, Thomas Zimmermann wrote: Hi Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente: Currently, the vkms atomic check only goes through the first position of the `vkms_wb_formats` vector. This change prepares the atomic_check to check the entire vector. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index 5a3e12f105dc..56978f499203 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, { struct drm_framebuffer *fb; const struct drm_display_mode *mode = &crtc_state->mode; + bool format_supported = false; + int i; if (!conn_state->writeback_job || !conn_state->writeback_job->fb) return 0; @@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, return -EINVAL; } - if (fb->format->format != vkms_wb_formats[0]) { + for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) { + if (fb->format->format == vkms_wb_formats[i]) { + format_supported = true; + break; + } + } At a minimum, this loop should be in a helper function. But more generally, I'm surprised that this isn't already covered by the DRM's atomic helpers. Ok, I can wrap it in a new function. AFAIK the DRM doesn't cover it. But I may be wrong... I couldn't find anything either. Other drivers do similar format and frambuffer checks. So I guess a helper could be implemented. All plane's are supposed to call drm_atomic_helper_check_plane_state() in their atomic_check() code. You could add a similar helper, say drm_atomic_helper_check_writeback_encoder_state(), that tests for the format and maybe other things as well. Do you think this should be done before or after this patch series? Best regards Thomas Best regards Thomas + + if (!format_supported) { DRM_DEBUG_KMS("Invalid pixel format %p4cc\n", &fb->format->format); return -EINVAL; Thanks, Igor Torrente
Re: [PATCH 6/6] drm: vkms: Refactor the plane composer to accept new formats
Hi Pekka, On 10/18/21 5:30 AM, Pekka Paalanen wrote: On Tue, 5 Oct 2021 17:16:37 -0300 Igor Matheus Andrade Torrente wrote: Currently the blend function only accepts XRGB_ and ARGB_ as a color input. This patch refactors all the functions related to the plane composition to overcome this limitation. Now the blend function receives a format handler to each plane and a blend function pointer. It will take two ARGB_1616161616 pixels, one for each handler, and will use the blend function to calculate and store the final color in the output buffer. These format handlers will receive the `vkms_composer` and a pair of coordinates. And they should return the respective pixel in the ARGB_16161616 format. The blend function will receive two ARGB_16161616 pixels, x, y, and the vkms_composer of the output buffer. The method should perform the blend operation and store output to the format aforementioned ARGB_16161616. Hi, here are some drive-by comments which you are free to take or leave. To find more efficient ways to do this, you might want research Pixman library. It's MIT licensed. IIRC, the elementary operations there operate on pixel lines (pointer + length), not individual pixels (image, x, y). Input conversion function reads and converts a whole line a time. Blending function blends two lines and writes the output into back one of the lines. Output conversion function takes a line and converts it into destination buffer. That way the elementary operations do not need to take stride into account, and blending does not need to deal with varying memory alignment FWIW. The inner loops involve much less function calls and state, probably. I was doing some rudimentary profiling, and I noticed that the code spends most of the time with the indirect system call overhead and not the actual computation. This can definitively help with it. Pixman may not do exactly this, but it does something very similar. Pixman also has multiple different levels of optimisations, which may not be necessary for VKMS. It's a trade-off between speed and temporary memory consumed. You need temporary buffers for two lines, but not more (not a whole image in full intermediate precision). Further optimisation could determine whether to process whole image rows as lines, or split a row into multiple lines to stay within CPU cache size. Sorry, I didn't understand the idea of the last sentence. Since it seems you are blending multiple planes into a single destination which is assumed to be opaque, you might also be able to do the multiple blends without convert-writing and read-converting to/from the destination between every plane. I think that might be possible to architect on top of the per-line operations still. I tried it. But I don't know how to do this without looking like a mess. Does the pixman perform some similar? Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_composer.c | 275 ++- drivers/gpu/drm/vkms/vkms_formats.h | 125 2 files changed, 271 insertions(+), 129 deletions(-) create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h ... + +u64 ARGB_to_ARGB16161616(struct vkms_composer *composer, int x, int y) +{ + u8 *pixel_addr = packed_pixels_addr(composer, x, y); + + /* +* Organizes the channels in their respective positions and converts +* the 8 bits channel to 16. +* The 257 is the "conversion ratio". This number is obtained by the +* (2^16 - 1) / (2^8 - 1) division. Which, in this case, tries to get +* the best color value in a color space with more possibilities. Pixel format, not color space. > +* And a similar idea applies to others RGB color conversions. +*/ + return ((u64)pixel_addr[3] * 257) << 48 | + ((u64)pixel_addr[2] * 257) << 32 | + ((u64)pixel_addr[1] * 257) << 16 | + ((u64)pixel_addr[0] * 257); I wonder if the compiler is smart enough to choose between "mul 257" and "(v << 8) | v" operations... but that's probably totally drowning under the overhead of per (x,y) looping. I disassembled the code to check it. And looks like the compiler is replacing the multiplication with shifts and additions. ARGB_to_ARGB16161616: 0x816ad660 <+0>: imul 0x12c(%rdi),%edx 0x816ad667 <+7>: imul 0x130(%rdi),%esi 0x816ad66e <+14>:add%edx,%esi 0x816ad670 <+16>:add0x128(%rdi),%esi 0x816ad676 <+22>:movslq %esi,%rax 0x816ad679 <+25>:add0xe8(%rdi),%rax 0x816ad680 <+32>:movzbl 0x3(%rax),%ecx 0x816ad684 <+36>:movzbl 0x2(%rax),%esi 0x816ad688 <+40>:mov%rcx,%rdx 0x816ad68b <+43>:shl$
Re: [PATCH 1/6] drm: vkms: Replace the deprecated drm_mode_config_init
Hi Thomas, On 10/18/21 7:02 AM, Thomas Zimmermann wrote: Hi Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente: The `drm_mode_config_init` was deprecated since c3b790e commit, and it's being replaced by the `drmm_mode_config_init`. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_drv.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 0ffe5f0e33f7..828868920494 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -140,8 +140,11 @@ static const struct drm_mode_config_helper_funcs vkms_mode_config_helpers = { static int vkms_modeset_init(struct vkms_device *vkmsdev) { struct drm_device *dev = &vkmsdev->drm; + int ret = drmm_mode_config_init(dev); + + if (ret < 0) + return ret; The style looks awkward IMHO. Rather use I don't think it's awkward. But I don't mind change it anyway. int ret ret = drmm_mode_config_init() if (ret) return ret; - drm_mode_config_init(dev); dev->mode_config.funcs = &vkms_mode_funcs; dev->mode_config.min_width = XRES_MIN; dev->mode_config.min_height = YRES_MIN;
Re: [PATCH 0/6] Refactor the vkms to accept new formats
Hi pq, On 10/18/21 4:53 AM, Pekka Paalanen wrote: On Tue, 5 Oct 2021 17:16:31 -0300 Igor Matheus Andrade Torrente wrote: XRGB to ARGB behavior = During the development, I decided to always fill the alpha channel of the output pixel whenever the conversion from a format without an alpha channel to ARGB16161616 is necessary. Therefore, I ignore the value received from the XRGB and overwrite the value with 0x. My question is, is this behavior acceptable? Hi, that is the expected behaviour. X channel values must never affect anything on screen, hence they must never affect other channels' values. You are free to completely ignore X channel values, and if your output buffer has X channel, then you are free to write (or not write, unless for security reasons) whatever into it. Great!! And thanks!!! Thanks, pq [1] https://lists.freedesktop.org/archives/igt-dev/2021-October/036125.html Igor Matheus Andrade Torrente (6): drm: vkms: Replace the deprecated drm_mode_config_init drm: vkms: Alloc the compose frame using vzalloc drm: vkms: Replace hardcoded value of `vkms_composer.map` to DRM_FORMAT_MAX_PLANES drm: vkms: Add fb information to `vkms_writeback_job` drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats drm: vkms: Refactor the plane composer to accept new formats drivers/gpu/drm/vkms/vkms_composer.c | 275 ++ drivers/gpu/drm/vkms/vkms_drv.c | 5 +- drivers/gpu/drm/vkms/vkms_drv.h | 12 +- drivers/gpu/drm/vkms/vkms_formats.h | 125 drivers/gpu/drm/vkms/vkms_writeback.c | 27 ++- 5 files changed, 304 insertions(+), 140 deletions(-) create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h
Re: [PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats
Hello, On 10/18/21 7:14 AM, Thomas Zimmermann wrote: Hi Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente: Currently, the vkms atomic check only goes through the first position of the `vkms_wb_formats` vector. This change prepares the atomic_check to check the entire vector. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index 5a3e12f105dc..56978f499203 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, { struct drm_framebuffer *fb; const struct drm_display_mode *mode = &crtc_state->mode; + bool format_supported = false; + int i; if (!conn_state->writeback_job || !conn_state->writeback_job->fb) return 0; @@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, return -EINVAL; } - if (fb->format->format != vkms_wb_formats[0]) { + for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) { + if (fb->format->format == vkms_wb_formats[i]) { + format_supported = true; + break; + } + } At a minimum, this loop should be in a helper function. But more generally, I'm surprised that this isn't already covered by the DRM's atomic helpers. Ok, I can wrap it in a new function. AFAIK the DRM doesn't cover it. But I may be wrong... Best regards Thomas + + if (!format_supported) { DRM_DEBUG_KMS("Invalid pixel format %p4cc\n", &fb->format->format); return -EINVAL;
[PATCH 6/6] drm: vkms: Refactor the plane composer to accept new formats
Currently the blend function only accepts XRGB_ and ARGB_ as a color input. This patch refactors all the functions related to the plane composition to overcome this limitation. Now the blend function receives a format handler to each plane and a blend function pointer. It will take two ARGB_1616161616 pixels, one for each handler, and will use the blend function to calculate and store the final color in the output buffer. These format handlers will receive the `vkms_composer` and a pair of coordinates. And they should return the respective pixel in the ARGB_16161616 format. The blend function will receive two ARGB_16161616 pixels, x, y, and the vkms_composer of the output buffer. The method should perform the blend operation and store output to the format aforementioned ARGB_16161616. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_composer.c | 275 ++- drivers/gpu/drm/vkms/vkms_formats.h | 125 2 files changed, 271 insertions(+), 129 deletions(-) create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 82f79e508f81..1e7c10c02a52 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -9,18 +9,28 @@ #include #include "vkms_drv.h" - -static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, -const struct vkms_composer *composer) -{ - u32 pixel; - int src_offset = composer->offset + (y * composer->pitch) - + (x * composer->cpp); - - pixel = *(u32 *)&buffer[src_offset]; - - return pixel; -} +#include "vkms_formats.h" + +#define get_output_vkms_composer(buffer_pointer, composer) \ + ((struct vkms_composer) { \ + .fb = (struct drm_framebuffer) {\ + .format = &(struct drm_format_info) { \ + .format = DRM_FORMAT_ARGB16161616, \ + }, \ + }, \ + .map[0].vaddr = (buffer_pointer), \ + .src = (composer)->src, \ + .dst = (composer)->dst, \ + .cpp = sizeof(u64), \ + .pitch = drm_rect_width(&(composer)->dst) * sizeof(u64) \ + }) + +struct vkms_pixel_composition_functions { + u64 (*get_src_pixel)(struct vkms_composer *composer, int x, int y); + u64 (*get_dst_pixel)(struct vkms_composer *composer, int x, int y); + void (*pixel_blend)(u64 argb_src1, u64 argb_src2, int x, int y, + struct vkms_composer *dst_composer); +}; /** * compute_crc - Compute CRC value on output frame @@ -31,42 +41,33 @@ static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer, * returns CRC value computed using crc32 on the visible portion of * the final framebuffer at vaddr_out */ -static uint32_t compute_crc(const u8 *vaddr, +static uint32_t compute_crc(const __le64 *vaddr, const struct vkms_composer *composer) { - int x, y; - u32 crc = 0, pixel = 0; - int x_src = composer->src.x1 >> 16; - int y_src = composer->src.y1 >> 16; - int h_src = drm_rect_height(&composer->src) >> 16; - int w_src = drm_rect_width(&composer->src) >> 16; - - for (y = y_src; y < y_src + h_src; ++y) { - for (x = x_src; x < x_src + w_src; ++x) { - pixel = get_pixel_from_buffer(x, y, vaddr, composer); - crc = crc32_le(crc, (void *)&pixel, sizeof(u32)); - } - } + int h = drm_rect_height(&composer->dst); + int w = drm_rect_width(&composer->dst); - return crc; + return crc32_le(0, (void *)vaddr, w * h * sizeof(u64)); } -static u8 blend_channel(u8 src, u8 dst, u8 alpha) +static __le16 blend_channel(u16 src, u16 dst, u16 alpha) { - u32 pre_blend; - u8 new_color; + u64 pre_blend; + u16 new_color; - pre_blend = (src * 255 + dst * (255 - alpha)); + pre_blend = (src * 0x + dst * (0x - alpha)); - /* Faster div by 255 */ - new_color = ((pre_blend + ((pre_blend + 257) >> 8)) >> 8); + new_color = DIV_ROUND_UP(pre_blend, 0x); - return new_color; + return cpu_to_le16(new_color); } /** * alpha_blend - alpha blending equation - * @argb_src: src pixel on premultiplied alpha mode + * @argb_src1: pixel of the source plane on premultiplied alpha mo
[PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats
Currently, the vkms atomic check only goes through the first position of the `vkms_wb_formats` vector. This change prepares the atomic_check to check the entire vector. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index 5a3e12f105dc..56978f499203 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, { struct drm_framebuffer *fb; const struct drm_display_mode *mode = &crtc_state->mode; + bool format_supported = false; + int i; if (!conn_state->writeback_job || !conn_state->writeback_job->fb) return 0; @@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder *encoder, return -EINVAL; } - if (fb->format->format != vkms_wb_formats[0]) { + for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) { + if (fb->format->format == vkms_wb_formats[i]) { + format_supported = true; + break; + } + } + + if (!format_supported) { DRM_DEBUG_KMS("Invalid pixel format %p4cc\n", &fb->format->format); return -EINVAL; -- 2.30.2
[PATCH 4/6] drm: vkms: Add fb information to `vkms_writeback_job`
This commit is the groundwork to introduce new formats to the planes and writeback buffer. As part of it, a new buffer metadata field is added to `vkms_writeback_job`, this metadata is represented by the `vkms_composer` struct. This will allow us, in the future, to have different compositing and wb format types. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_drv.h | 10 +- drivers/gpu/drm/vkms/vkms_writeback.c | 16 +--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index 64e62993b06f..d62f8ebd454b 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -20,11 +20,6 @@ #define XRES_MAX 8192 #define YRES_MAX 8192 -struct vkms_writeback_job { - struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; - struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; -}; - struct vkms_composer { struct drm_framebuffer fb; struct drm_rect src, dst; @@ -34,6 +29,11 @@ struct vkms_composer { unsigned int cpp; }; +struct vkms_writeback_job { + struct dma_buf_map data[DRM_FORMAT_MAX_PLANES]; + struct vkms_composer composer; +}; + /** * vkms_plane_state - Driver specific plane state * @base: base plane state diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index 8694227f555f..5a3e12f105dc 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -75,7 +75,7 @@ static int vkms_wb_prepare_job(struct drm_writeback_connector *wb_connector, if (!vkmsjob) return -ENOMEM; - ret = drm_gem_fb_vmap(job->fb, vkmsjob->map, vkmsjob->data); + ret = drm_gem_fb_vmap(job->fb, vkmsjob->composer.map, vkmsjob->data); if (ret) { DRM_ERROR("vmap failed: %d\n", ret); goto err_kfree; @@ -99,7 +99,7 @@ static void vkms_wb_cleanup_job(struct drm_writeback_connector *connector, if (!job->fb) return; - drm_gem_fb_vunmap(job->fb, vkmsjob->map); + drm_gem_fb_vunmap(job->fb, vkmsjob->composer.map); vkmsdev = drm_device_to_vkms_device(job->fb->dev); vkms_set_composer(&vkmsdev->output, false); @@ -116,14 +116,24 @@ static void vkms_wb_atomic_commit(struct drm_connector *conn, struct drm_writeback_connector *wb_conn = &output->wb_connector; struct drm_connector_state *conn_state = wb_conn->base.state; struct vkms_crtc_state *crtc_state = output->composer_state; + struct drm_framebuffer *fb = connector_state->writeback_job->fb; + struct vkms_writeback_job *active_wb; + struct vkms_composer *wb_composer; if (!conn_state) return; vkms_set_composer(&vkmsdev->output, true); + active_wb = conn_state->writeback_job->priv; + wb_composer = &active_wb->composer; + spin_lock_irq(&output->composer_lock); - crtc_state->active_writeback = conn_state->writeback_job->priv; + crtc_state->active_writeback = active_wb; + memcpy(&wb_composer->fb, fb, sizeof(struct drm_framebuffer)); + wb_composer->offset = fb->offsets[0]; + wb_composer->pitch = fb->pitches[0]; + wb_composer->cpp = fb->format->cpp[0]; crtc_state->wb_pending = true; spin_unlock_irq(&output->composer_lock); drm_writeback_queue_job(wb_conn, connector_state); -- 2.30.2
[PATCH 3/6] drm: vkms: Replace hardcoded value of `vkms_composer.map` to DRM_FORMAT_MAX_PLANES
The `map` vector at `vkms_composer` uses a hardcoded value to define its size. If someday the maximum number of planes increases, this hardcoded value can be a problem. This value is being replaced with the DRM_FORMAT_MAX_PLANES macro. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index d48c23d40ce5..64e62993b06f 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -28,7 +28,7 @@ struct vkms_writeback_job { struct vkms_composer { struct drm_framebuffer fb; struct drm_rect src, dst; - struct dma_buf_map map[4]; + struct dma_buf_map map[DRM_FORMAT_MAX_PLANES]; unsigned int offset; unsigned int pitch; unsigned int cpp; -- 2.30.2
[PATCH 2/6] drm: vkms: Alloc the compose frame using vzalloc
Currently, the memory to the composition frame is being allocated using the kzmalloc. This comes with the limitation of maximum size of one page size(which in the x86_64 is 4Kb and 4MB for default and hugepage respectively). Somes test of igt (e.g. kms_plane@pixel-format) uses more than 4MB when testing some pixel formats like ARGB16161616. This problem is addessed by allocating the memory using kvzalloc that circunvents this limitation. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_composer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 9e8204be9a14..82f79e508f81 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -180,7 +180,7 @@ static int compose_active_planes(void **vaddr_out, int i; if (!*vaddr_out) { - *vaddr_out = kzalloc(gem_obj->size, GFP_KERNEL); + *vaddr_out = kvzalloc(gem_obj->size, GFP_KERNEL); if (!*vaddr_out) { DRM_ERROR("Cannot allocate memory for output frame."); return -ENOMEM; @@ -263,7 +263,7 @@ void vkms_composer_worker(struct work_struct *work) crtc_state); if (ret) { if (ret == -EINVAL && !wb_pending) - kfree(vaddr_out); + kvfree(vaddr_out); return; } @@ -275,7 +275,7 @@ void vkms_composer_worker(struct work_struct *work) crtc_state->wb_pending = false; spin_unlock_irq(&out->composer_lock); } else { - kfree(vaddr_out); + kvfree(vaddr_out); } /* -- 2.30.2
[PATCH 1/6] drm: vkms: Replace the deprecated drm_mode_config_init
The `drm_mode_config_init` was deprecated since c3b790e commit, and it's being replaced by the `drmm_mode_config_init`. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/vkms/vkms_drv.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 0ffe5f0e33f7..828868920494 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -140,8 +140,11 @@ static const struct drm_mode_config_helper_funcs vkms_mode_config_helpers = { static int vkms_modeset_init(struct vkms_device *vkmsdev) { struct drm_device *dev = &vkmsdev->drm; + int ret = drmm_mode_config_init(dev); + + if (ret < 0) + return ret; - drm_mode_config_init(dev); dev->mode_config.funcs = &vkms_mode_funcs; dev->mode_config.min_width = XRES_MIN; dev->mode_config.min_height = YRES_MIN; -- 2.30.2
[PATCH 0/6] Refactor the vkms to accept new formats
Summary === This series of patches refactor some vkms components in order to introduce new formats to the planes and writeback connector. Now in the blend function, the plane's pixels are converted to ARGB16161616 and then blended together. The CRC is calculated based on the ARGB1616161616 buffer. And if required, this buffer is copied/converted to the writeback buffer format. And to handle the pixel conversion, new functions were added to convert from a specific format to ARGB16161616 (the reciprocal is also true). Tests = This patch series was tested using the following igt tests: -t ".*kms_plane.*" -t ".*kms_writeback.*" -t ".*kms_cursor_crc*" New tests passing --- - pipe-A-cursor-size-change - pipe-A-cursor-alpha-transparent Tests and Performance Regressions - This pack of tests is failing: - pipe-A-cursor-*-rapid-movement This is expected since there are more operations per pixel than before. And consequently, the compositing is way slower than before. My micro-profiling shows these ranges to the completion of each compositing in the .*kms_cursor_crc.* tests: | Frametime | |:---:|::| | before | after | | 8~20 ms | 32~56 ms | Hence, further optimizations will be required. Writeback test --- During the development of this patch series, I discovered that the writeback-check-output test wasn't filling the plane correctly. So, currently, this patch series is failing in this test. But I sent a patch to igt to fix it[1]. XRGB to ARGB behavior = During the development, I decided to always fill the alpha channel of the output pixel whenever the conversion from a format without an alpha channel to ARGB16161616 is necessary. Therefore, I ignore the value received from the XRGB and overwrite the value with 0x. My question is, is this behavior acceptable? [1] https://lists.freedesktop.org/archives/igt-dev/2021-October/036125.html Igor Matheus Andrade Torrente (6): drm: vkms: Replace the deprecated drm_mode_config_init drm: vkms: Alloc the compose frame using vzalloc drm: vkms: Replace hardcoded value of `vkms_composer.map` to DRM_FORMAT_MAX_PLANES drm: vkms: Add fb information to `vkms_writeback_job` drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats drm: vkms: Refactor the plane composer to accept new formats drivers/gpu/drm/vkms/vkms_composer.c | 275 ++ drivers/gpu/drm/vkms/vkms_drv.c | 5 +- drivers/gpu/drm/vkms/vkms_drv.h | 12 +- drivers/gpu/drm/vkms/vkms_formats.h | 125 drivers/gpu/drm/vkms/vkms_writeback.c | 27 ++- 5 files changed, 304 insertions(+), 140 deletions(-) create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h -- 2.30.2
Re: VKMS: New plane formats
On 9/1/21 5:24 PM, Simon Ser wrote: Ideally the final composition format would have enough precision for all of the planes. I think it'd make sense to use ARGB16161616 if the primary plane uses ARGB and an overlay plane uses ARGB16161616. To simplify the code, maybe it's fine to always use ARGB16161616 for the output, and add getters which fetch an ARGB16161616 row for each supported plane format. This makes sense to me. I will try to implement this way. Thanks!
VKMS: New plane formats
Hi, I'm working to add new plane formats to vkms. But I don't know what should be the behavior in the situation that we received multiple planes with different formats from the users-space. For example, if the user chooses: - DRM_FORMAT_ARGB16161616 to the primary plane - DRM_FORMAT_ARGB to the cursor - DRM_FORMAT_YUV42 to the overlay What should be the output format that will be used to calculate the crc? DRM_FORMAT_ARGB16161616? My idea was to convert all the planes to the primary, but I'm not sure if it is the right approach. Best regards, --- Igor M. A. Torrente
[PATCH v3] drm: Align a comment block
Fix a checkpatch warning caused by a misaligned comment block. Signed-off-by: Igor Matheus Andrade Torrente --- Changes in v2: - Change subject text Changes in V3 - Fix a typo in the commit message drivers/gpu/drm/drm_gem.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index a9e4a610445a..564acc1f4030 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ +* Must bump handle count first as this may be the last +* ref, in which case the object would disappear before we +* checked for a name +*/ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v2] drm: Alligne a comment block
Fix a checkpatch warning caused by a misaligned comment block. Changes in v2: - Change subject text Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/drm_gem.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index a9e4a610445a..564acc1f4030 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ +* Must bump handle count first as this may be the last +* ref, in which case the object would disappear before we +* checked for a name +*/ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v2] drm: Correct a typo in a function comment
Replace "pionter" with "pointer" in the drm_gem_handle_create description. Changes in v2: - Change subject text Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/drm_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6e960d57371e..c356379f5e97 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -432,7 +432,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, * drm_gem_handle_create - create a gem handle for an object * @file_priv: drm file-private structure to register the handle for * @obj: object to register - * @handlep: pionter to return the created handle to the caller + * @handlep: pointer to return the created handle to the caller * * Create a handle for this object. This adds a handle reference to the object, * which includes a regular reference count. Callers will likely want to -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v2] drm: Alligne a comment block
Fix a checkpatch warning caused by a misaligned comment block. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/drm_gem.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 000fa4a1899f..6e960d57371e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ +* Must bump handle count first as this may be the last +* ref, in which case the object would disappear before we +* checked for a name +*/ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] Staging: drm_gem: Fix a misaligned comment block
Fix a checkpatch warning caused by a misaligned comment block. Signed-off-by: Igor Matheus Andrade Torrente --- drivers/gpu/drm/drm_gem.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 000fa4a1899f..6e960d57371e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object *obj) return; /* - * Must bump handle count first as this may be the last - * ref, in which case the object would disappear before we - * checked for a name - */ +* Must bump handle count first as this may be the last +* ref, in which case the object would disappear before we +* checked for a name +*/ mutex_lock(&dev->object_name_lock); if (--obj->handle_count == 0) { -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel