Re: [PATCH v2] drm/vkms: fix 32bit compilation error by replacing macros

2022-09-11 Thread Igor Matheus Andrade Torrente

Reviewed-by: Igor Torrente 

On 9/10/22 16:03, Melissa Wen wrote:

Replace vkms_formats macro for fixed-point operations with functions
from drm/drm_fixed.h to do the same job and fix 32-bit compilation
errors.

v2:
- don't cast results to s32 (Igor)
- add missing drm_fixp2int conversion (Igor)

Fixes: a19c2ac9858 ("drm: vkms: Add support to the RGB565 format")
Tested-by: Sudip Mukherjee  (v1)
Reviewed-by: Alex Deucher  (v1)
Reported-by: Sudip Mukherjee 
Reported-by: kernel test robot 
Signed-off-by: Melissa Wen 
---
  drivers/gpu/drm/vkms/vkms_formats.c | 53 +++--
  1 file changed, 19 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_formats.c 
b/drivers/gpu/drm/vkms/vkms_formats.c
index 300abb4d1dfe..d4950688b3f1 100644
--- a/drivers/gpu/drm/vkms/vkms_formats.c
+++ b/drivers/gpu/drm/vkms/vkms_formats.c
@@ -1,27 +1,12 @@
  // SPDX-License-Identifier: GPL-2.0+
  
-#include 

+#include 
  #include 
+#include 
+#include 
  
  #include "vkms_formats.h"
  
-/* The following macros help doing fixed point arithmetic. */

-/*
- * With Fixed-Point scale 15 we have 17 and 15 bits of integer and fractional
- * parts respectively.
- *  |     0.000    |
- * 31  0
- */
-#define SHIFT 15
-
-#define INT_TO_FIXED(a) ((a) << SHIFT)
-#define FIXED_MUL(a, b) ((s32)(((s64)(a) * (b)) >> SHIFT))
-#define FIXED_DIV(a, b) ((s32)(((s64)(a) << SHIFT) / (b)))
-/* This macro converts a fixed point number to int, and round half up it */
-#define FIXED_TO_INT_ROUND(a) (((a) + (1 << (SHIFT - 1))) >> SHIFT)
-#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b)))
-#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b)))
-
  static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, 
int y)
  {
return frame_info->offset + (y * frame_info->pitch)
@@ -137,19 +122,19 @@ static void RGB565_to_argb_u16(struct line_buffer 
*stage_buffer,
int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
   stage_buffer->n_pixels);
  
-	s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31);

-   s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63);
+   s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31));
+   s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63));
  
  	for (size_t x = 0; x < x_limit; x++, src_pixels++) {

u16 rgb_565 = le16_to_cpu(*src_pixels);
-   s32 fp_r = INT_TO_FIXED((rgb_565 >> 11) & 0x1f);
-   s32 fp_g = INT_TO_FIXED((rgb_565 >> 5) & 0x3f);
-   s32 fp_b = INT_TO_FIXED(rgb_565 & 0x1f);
+   s64 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f);
+   s64 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f);
+   s64 fp_b = drm_int2fixp(rgb_565 & 0x1f);
  
  		out_pixels[x].a = (u16)0x;

-   out_pixels[x].r = FIXED_TO_INT_ROUND(FIXED_MUL(fp_r, 
fp_rb_ratio));
-   out_pixels[x].g = FIXED_TO_INT_ROUND(FIXED_MUL(fp_g, 
fp_g_ratio));
-   out_pixels[x].b = FIXED_TO_INT_ROUND(FIXED_MUL(fp_b, 
fp_rb_ratio));
+   out_pixels[x].r = drm_fixp2int(drm_fixp_mul(fp_r, fp_rb_ratio));
+   out_pixels[x].g = drm_fixp2int(drm_fixp_mul(fp_g, fp_g_ratio));
+   out_pixels[x].b = drm_fixp2int(drm_fixp_mul(fp_b, fp_rb_ratio));
}
  }
  
@@ -248,17 +233,17 @@ static void argb_u16_to_RGB565(struct vkms_frame_info *frame_info,

int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
src_buffer->n_pixels);
  
-	s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31);

-   s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63);
+   s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31));
+   s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63));
  
  	for (size_t x = 0; x < x_limit; x++, dst_pixels++) {

-   s32 fp_r = INT_TO_FIXED(in_pixels[x].r);
-   s32 fp_g = INT_TO_FIXED(in_pixels[x].g);
-   s32 fp_b = INT_TO_FIXED(in_pixels[x].b);
+   s64 fp_r = drm_int2fixp(in_pixels[x].r);
+   s64 fp_g = drm_int2fixp(in_pixels[x].g);
+   s64 fp_b = drm_int2fixp(in_pixels[x].b);
  
-		u16 r = FIXED_TO_INT_ROUND(FIXED_DIV(fp_r, fp_rb_ratio));

-   u16 g = FIXED_TO_INT_ROUND(FIXED_DIV(fp_g, fp_g_ratio));
-   u16 b = FIXED_TO_INT_ROUND(FIXED_DIV(fp_b, fp_rb_ratio));
+   u16 r = drm_fixp2int(drm_fixp_div(fp_r, fp_rb_ratio));
+   u16 g = drm_fixp2int(drm_fixp_div(fp_g, fp_g_ratio));
+   u16 b = drm_fixp2int(drm_fixp_div(fp_b, fp_rb_ratio));
  
  		*dst_pixels = cpu_to_le16(r << 11 | g << 5 | b);

}




Re: [PATCH] drm/vkms: fix 32bit compilation error by replacing macros

2022-09-11 Thread Igor Matheus Andrade Torrente

On 9/10/22 16:10, Melissa Wen wrote:

On 09/09, Igor Matheus Andrade Torrente wrote:

Hi Mellisa,

Thanks for the patch fixing my mistakes.

On 9/9/22 08:41, Melissa Wen wrote:

Replace vkms_formats macros for fixed-point operations with functions
from drm/drm_fixed.h to do the same job and fix 32-bit compilation
errors.

Fixes: a19c2ac9858 ("drm: vkms: Add support to the RGB565 format")
Tested-by: Sudip Mukherjee 
Reported-by: Sudip Mukherjee 
Reported-by: kernel test robot 
Signed-off-by: Melissa Wen 
---
   drivers/gpu/drm/vkms/vkms_formats.c | 53 +++--
   1 file changed, 19 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_formats.c 
b/drivers/gpu/drm/vkms/vkms_formats.c
index 300abb4d1dfe..ddcd3cfeeaac 100644
--- a/drivers/gpu/drm/vkms/vkms_formats.c
+++ b/drivers/gpu/drm/vkms/vkms_formats.c
@@ -1,27 +1,12 @@
   // SPDX-License-Identifier: GPL-2.0+
-#include 
+#include 
   #include 
+#include 
+#include 
   #include "vkms_formats.h"
-/* The following macros help doing fixed point arithmetic. */
-/*
- * With Fixed-Point scale 15 we have 17 and 15 bits of integer and fractional
- * parts respectively.
- *  |     0.000    |
- * 31  0
- */
-#define SHIFT 15
-
-#define INT_TO_FIXED(a) ((a) << SHIFT)
-#define FIXED_MUL(a, b) ((s32)(((s64)(a) * (b)) >> SHIFT))
-#define FIXED_DIV(a, b) ((s32)(((s64)(a) << SHIFT) / (b)))
-/* This macro converts a fixed point number to int, and round half up it */
-#define FIXED_TO_INT_ROUND(a) (((a) + (1 << (SHIFT - 1))) >> SHIFT)
-#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b)))
-#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b)))
-
   static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, 
int y)
   {
return frame_info->offset + (y * frame_info->pitch)
@@ -137,19 +122,19 @@ static void RGB565_to_argb_u16(struct line_buffer 
*stage_buffer,
int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
   stage_buffer->n_pixels);
-   s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31);
-   s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63);
+   s32 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), 31);
+   s32 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), 63);


I think you need to add `drm_int2fixp` to 31 and 63.


for (size_t x = 0; x < x_limit; x++, src_pixels++) {
u16 rgb_565 = le16_to_cpu(*src_pixels);
-   s32 fp_r = INT_TO_FIXED((rgb_565 >> 11) & 0x1f);
-   s32 fp_g = INT_TO_FIXED((rgb_565 >> 5) & 0x3f);
-   s32 fp_b = INT_TO_FIXED(rgb_565 & 0x1f);
+   s32 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f);
+   s32 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f);
+   s32 fp_b = drm_int2fixp(rgb_565 & 0x1f);


And we are cast implicitly from 64 bits int to 32 bits which is
implementation-defined AFAIK. So, probably we should be using `s64` for all
of these variables.

I tested the patch. And I'm seeing some differences in the intermediate
results. From my testing, these changes solve those differences.


Hi Igor,

Thanks for checking the calc results and all inputs provided.  I just
sent a second version, can you take a look? I replicated your
suggestions for RGB565_to_argb_u16() in argb_u16_to_RGB565() and
double-checked for i386 and arm. Let me know what yexiuou think.


I tested it here and everything seem to be working :).





Another thing that may have an impact on the final output is the lack of
rounding in drm_fixed.h. This can potentially produce the wrong result.


Yeah, I see... I can include a comment about the rounding issue for
further improvements, or do you plan to work on it?


A comment would be good. Maybe add to the VKMS TODO list?

I'm busy with other stuffs, and can't work on this any time soon. But 
It's pretty simple thing to implement.




Thanks,

Melissa


Thanks,
---
Igor Torrente


out_pixels[x].a = (u16)0x;
-   out_pixels[x].r = FIXED_TO_INT_ROUND(FIXED_MUL(fp_r, 
fp_rb_ratio));
-   out_pixels[x].g = FIXED_TO_INT_ROUND(FIXED_MUL(fp_g, 
fp_g_ratio));
-   out_pixels[x].b = FIXED_TO_INT_ROUND(FIXED_MUL(fp_b, 
fp_rb_ratio));
+   out_pixels[x].r = drm_fixp2int(drm_fixp_mul(fp_r, fp_rb_ratio));
+   out_pixels[x].g = drm_fixp2int(drm_fixp_mul(fp_g, fp_g_ratio));
+   out_pixels[x].b = drm_fixp2int(drm_fixp_mul(fp_b, fp_rb_ratio));
}
   }
@@ -248,17 +233,17 @@ static void argb_u16_to_RGB565(struct vkms_frame_info 
*frame_info,
int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
src_buffer->n_pixels);
-   s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535,

Re: [PATCH] drm/vkms: fix 32bit compilation error by replacing macros

2022-09-09 Thread Igor Matheus Andrade Torrente

Hi Mellisa,

Thanks for the patch fixing my mistakes.

On 9/9/22 08:41, Melissa Wen wrote:

Replace vkms_formats macros for fixed-point operations with functions
from drm/drm_fixed.h to do the same job and fix 32-bit compilation
errors.

Fixes: a19c2ac9858 ("drm: vkms: Add support to the RGB565 format")
Tested-by: Sudip Mukherjee 
Reported-by: Sudip Mukherjee 
Reported-by: kernel test robot 
Signed-off-by: Melissa Wen 
---
  drivers/gpu/drm/vkms/vkms_formats.c | 53 +++--
  1 file changed, 19 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_formats.c 
b/drivers/gpu/drm/vkms/vkms_formats.c
index 300abb4d1dfe..ddcd3cfeeaac 100644
--- a/drivers/gpu/drm/vkms/vkms_formats.c
+++ b/drivers/gpu/drm/vkms/vkms_formats.c
@@ -1,27 +1,12 @@
  // SPDX-License-Identifier: GPL-2.0+
  
-#include 

+#include 
  #include 
+#include 
+#include 
  
  #include "vkms_formats.h"
  
-/* The following macros help doing fixed point arithmetic. */

-/*
- * With Fixed-Point scale 15 we have 17 and 15 bits of integer and fractional
- * parts respectively.
- *  |     0.000    |
- * 31  0
- */
-#define SHIFT 15
-
-#define INT_TO_FIXED(a) ((a) << SHIFT)
-#define FIXED_MUL(a, b) ((s32)(((s64)(a) * (b)) >> SHIFT))
-#define FIXED_DIV(a, b) ((s32)(((s64)(a) << SHIFT) / (b)))
-/* This macro converts a fixed point number to int, and round half up it */
-#define FIXED_TO_INT_ROUND(a) (((a) + (1 << (SHIFT - 1))) >> SHIFT)
-#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b)))
-#define INT_TO_FIXED_DIV(a, b) (FIXED_DIV(INT_TO_FIXED(a), INT_TO_FIXED(b)))
-
  static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, 
int y)
  {
return frame_info->offset + (y * frame_info->pitch)
@@ -137,19 +122,19 @@ static void RGB565_to_argb_u16(struct line_buffer 
*stage_buffer,
int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
   stage_buffer->n_pixels);
  
-	s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31);

-   s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63);
+   s32 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), 31);
+   s32 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), 63);


I think you need to add `drm_int2fixp` to 31 and 63.

  
  	for (size_t x = 0; x < x_limit; x++, src_pixels++) {

u16 rgb_565 = le16_to_cpu(*src_pixels);
-   s32 fp_r = INT_TO_FIXED((rgb_565 >> 11) & 0x1f);
-   s32 fp_g = INT_TO_FIXED((rgb_565 >> 5) & 0x3f);
-   s32 fp_b = INT_TO_FIXED(rgb_565 & 0x1f);
+   s32 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f);
+   s32 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f);
+   s32 fp_b = drm_int2fixp(rgb_565 & 0x1f);


And we are cast implicitly from 64 bits int to 32 bits which is 
implementation-defined AFAIK. So, probably we should be using `s64` for 
all of these variables.


I tested the patch. And I'm seeing some differences in the intermediate 
results. From my testing, these changes solve those differences.


Another thing that may have an impact on the final output is the lack of 
rounding in drm_fixed.h. This can potentially produce the wrong result.


Thanks,
---
Igor Torrente

  
  		out_pixels[x].a = (u16)0x;

-   out_pixels[x].r = FIXED_TO_INT_ROUND(FIXED_MUL(fp_r, 
fp_rb_ratio));
-   out_pixels[x].g = FIXED_TO_INT_ROUND(FIXED_MUL(fp_g, 
fp_g_ratio));
-   out_pixels[x].b = FIXED_TO_INT_ROUND(FIXED_MUL(fp_b, 
fp_rb_ratio));
+   out_pixels[x].r = drm_fixp2int(drm_fixp_mul(fp_r, fp_rb_ratio));
+   out_pixels[x].g = drm_fixp2int(drm_fixp_mul(fp_g, fp_g_ratio));
+   out_pixels[x].b = drm_fixp2int(drm_fixp_mul(fp_b, fp_rb_ratio));
}
  }
  
@@ -248,17 +233,17 @@ static void argb_u16_to_RGB565(struct vkms_frame_info *frame_info,

int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst),
src_buffer->n_pixels);
  
-	s32 fp_rb_ratio = INT_TO_FIXED_DIV(65535, 31);

-   s32 fp_g_ratio = INT_TO_FIXED_DIV(65535, 63);
+   s32 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), 31);
+   s32 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), 63);
  
  	for (size_t x = 0; x < x_limit; x++, dst_pixels++) {

-   s32 fp_r = INT_TO_FIXED(in_pixels[x].r);
-   s32 fp_g = INT_TO_FIXED(in_pixels[x].g);
-   s32 fp_b = INT_TO_FIXED(in_pixels[x].b);
+   s32 fp_r = drm_int2fixp(in_pixels[x].r);
+   s32 fp_g = drm_int2fixp(in_pixels[x].g);
+   s32 fp_b = drm_int2fixp(in_pixels[x].b);
  
-		u16 r = FIXED_TO_INT_ROUND(FIXED_DIV(fp_r, fp_rb_ratio));

-   u16 g = FIXED_TO_INT_ROUND(FIXED_DIV(fp_g, fp_g_ratio));
-   u16 b = FIXED_TO_INT_ROUND(FIXED_DIV(fp_b, fp_rb_ratio));
+   u16 r = drm_fixp2int(drm_fixp_div(fp_r, fp_rb_ratio));
+

Re: build failure of next-20220906 due to 396369d67549 ("drm: vkms: Add support to the RGB565 format")

2022-09-06 Thread Igor Matheus Andrade Torrente

On 9/6/22 18:26, Sudip Mukherjee wrote:

On Tue, Sep 6, 2022 at 4:59 PM Sudip Mukherjee (Codethink)
 wrote:


Hi All,

The builds of next-20220906 fails for mips, xtensa and arm allmodconfig.

The errors in mips and xtensa are:

ERROR: modpost: "__divdi3" [drivers/gpu/drm/vkms/vkms.ko] undefined!
ERROR: modpost: "__udivdi3" [drivers/gpu/drm/vkms/vkms.ko] undefined!

The error in arm is:

ERROR: modpost: "__aeabi_uldivmod" [drivers/gpu/drm/vkms/vkms.ko] undefined!
ERROR: modpost: "__aeabi_ldivmod" [drivers/gpu/drm/vkms/vkms.ko] undefined!


Trying to do a git bisect to find out the offending commit.


git bisect points to 396369d67549 ("drm: vkms: Add support to the
RGB565 format")


Are these architectures incapable of doing 64bits int division?








Re: [RESEND v6 6/9] drm: vkms: Refactor the plane composer to accept new formats

2022-08-22 Thread Igor Matheus Andrade Torrente

On 8/22/22 16:01, Melissa Wen wrote:

On 08/22, Igor Matheus Andrade Torrente wrote:

Hi Melissa,

On 8/20/22 07:51, Melissa Wen wrote:

On 08/19, Igor Torrente wrote:

Currently the blend function only accepts XRGB_ and ARGB_
as a color input.

This patch refactors all the functions related to the plane composition
to overcome this limitation.

The pixels blend is done using the new internal format. And new handlers
are being added to convert a specific format to/from this internal format.

So the blend operation depends on these handlers to convert to this common
format. The blended result, if necessary, is converted to the writeback
buffer format.

This patch introduces three major differences to the blend function.
1 - All the planes are blended at once.
2 - The blend calculus is done as per line instead of per pixel.
3 - It is responsible to calculates the CRC and writing the writeback
buffer(if necessary).

These changes allow us to allocate way less memory in the intermediate
buffer to compute these operations. Because now we don't need to
have the entire intermediate image lines at once, just one line is
enough.

| Memory consumption (output dimensions) |
|:--:|
|   Current  | This patch|
|:--:|:-:|
|   Width * Heigth   | 2 * Width |

Beyond memory, we also have a minor performance benefit from all
these changes. Results running the IGT[1] test
`igt@kms_cursor_crc@pipe-a-cursor-512x512-onscreen` ten times:

| Frametime  |
|:--:|
|  Implementation |  Current  |  This commit |
|:---:|:-:|::|
| frametime range |  9~22 ms  |5~17 ms   |
| Average |  11.4 ms  |7.8 ms|

[1] IGT commit id: bc3f6833a12221a46659535dac06ebb312490eb4

V2: Improves the performance drastically, by performing the operations
  per-line and not per-pixel(Pekka Paalanen).
  Minor improvements(Pekka Paalanen).
V3: Changes the code to blend the planes all at once. This improves
  performance, memory consumption, and removes much of the weirdness
  of the V2(Pekka Paalanen and me).
  Minor improvements(Pekka Paalanen and me).
V4: Rebase the code and adapt it to the new NUM_OVERLAY_PLANES constant.
V5: Minor checkpatch fixes and the removal of TO-DO item(Melissa Wen).
  Several security/robustness improvents(Pekka Paalanen).
  Removes check_planes_x_bounds function and allows partial
  partly off-screen(Pekka Paalanen).
V6: Fix a mismatch of some variable sizes (Pekka Paalanen).
  Several minor improvements (Pekka Paalanen).

Reported-by: kernel test robot 
Signed-off-by: Igor Torrente 
---
   Documentation/gpu/vkms.rst|   4 -
   drivers/gpu/drm/vkms/Makefile |   1 +
   drivers/gpu/drm/vkms/vkms_composer.c  | 320 --
   drivers/gpu/drm/vkms/vkms_formats.c   | 155 +
   drivers/gpu/drm/vkms/vkms_formats.h   |  12 +
   drivers/gpu/drm/vkms/vkms_plane.c |   3 +
   drivers/gpu/drm/vkms/vkms_writeback.c |   3 +
   7 files changed, 317 insertions(+), 181 deletions(-)
   create mode 100644 drivers/gpu/drm/vkms/vkms_formats.c
   create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h

diff --git a/Documentation/gpu/vkms.rst b/Documentation/gpu/vkms.rst
index 973e2d43108b..a49e4ae92653 100644
--- a/Documentation/gpu/vkms.rst
+++ b/Documentation/gpu/vkms.rst
@@ -118,10 +118,6 @@ Add Plane Features
   There's lots of plane features we could add support for:
-- Clearing primary plane: clear primary plane before plane composition (at the
-  start) for correctness of pixel blend ops. It also guarantees alpha channel
-  is cleared in the target buffer for stable crc. [Good to get started]
-
   - ARGB format on primary plane: blend the primary plane into background with
 translucent alpha.
diff --git a/drivers/gpu/drm/vkms/Makefile b/drivers/gpu/drm/vkms/Makefile
index 72f779cbfedd..1b28a6a32948 100644
--- a/drivers/gpu/drm/vkms/Makefile
+++ b/drivers/gpu/drm/vkms/Makefile
@@ -3,6 +3,7 @@ vkms-y := \
vkms_drv.o \
vkms_plane.o \
vkms_output.o \
+   vkms_formats.o \
vkms_crtc.o \
vkms_composer.o \
vkms_writeback.o
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index b9fb408e8973..5b1a8bdd8268 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -7,204 +7,188 @@
   #include 
   #include 
   #include 
+#include 
   #include "vkms_drv.h"
-static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,
-const struct vkms_frame_info *frame_info)
+static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
   {
-   u32 pixel;
-   int src_offset = frame_info->offset + (y * frame_info->pitch)
-   

Re: [RESEND v6 2/9] drm: vkms: Rename `vkms_composer` to `vkms_frame_info`

2022-08-22 Thread Igor Matheus Andrade Torrente

On 8/22/22 15:37, Melissa Wen wrote:

On 08/22, Igor Matheus Andrade Torrente wrote:

Hi Mellisa,

On 8/20/22 08:00, Melissa Wen wrote:

On 08/19, Igor Torrente wrote:

Changes the name of this struct to a more meaningful name.
A name that represents better what this struct is about.

Composer is the code that do the compositing of the planes.
This struct contains information on the frame used in the output
composition. Thus, vkms_frame_info is a better name to represent
this.

V5: Fix a commit message typo(Melissa Wen).

Reviewed-by: Melissa Wen 
Signed-off-by: Igor Torrente 
---
   drivers/gpu/drm/vkms/vkms_composer.c | 87 ++--
   drivers/gpu/drm/vkms/vkms_drv.h  |  6 +-
   drivers/gpu/drm/vkms/vkms_plane.c| 38 ++--
   3 files changed, 66 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 775b97766e08..0aded4e87e60 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -11,11 +11,11 @@
   #include "vkms_drv.h"
   static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,
-const struct vkms_composer *composer)
+const struct vkms_frame_info *frame_info)
   {
u32 pixel;
-   int src_offset = composer->offset + (y * composer->pitch)
- + (x * composer->cpp);
+   int src_offset = frame_info->offset + (y * frame_info->pitch)
+   + (x * frame_info->cpp);
pixel = *(u32 *)&buffer[src_offset];
@@ -26,24 +26,24 @@ static u32 get_pixel_from_buffer(int x, int y, const u8 
*buffer,
* compute_crc - Compute CRC value on output frame
*
* @vaddr: address to final framebuffer
- * @composer: framebuffer's metadata
+ * @frame_info: framebuffer's metadata
*
* returns CRC value computed using crc32 on the visible portion of
* the final framebuffer at vaddr_out
*/
   static uint32_t compute_crc(const u8 *vaddr,
-   const struct vkms_composer *composer)
+   const struct vkms_frame_info *frame_info)
   {
int x, y;
u32 crc = 0, pixel = 0;
-   int x_src = composer->src.x1 >> 16;
-   int y_src = composer->src.y1 >> 16;
-   int h_src = drm_rect_height(&composer->src) >> 16;
-   int w_src = drm_rect_width(&composer->src) >> 16;
+   int x_src = frame_info->src.x1 >> 16;
+   int y_src = frame_info->src.y1 >> 16;
+   int h_src = drm_rect_height(&frame_info->src) >> 16;
+   int w_src = drm_rect_width(&frame_info->src) >> 16;
for (y = y_src; y < y_src + h_src; ++y) {
for (x = x_src; x < x_src + w_src; ++x) {
-   pixel = get_pixel_from_buffer(x, y, vaddr, composer);
+   pixel = get_pixel_from_buffer(x, y, vaddr, frame_info);
crc = crc32_le(crc, (void *)&pixel, sizeof(u32));
}
}
@@ -98,8 +98,8 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst)
* blend - blend value at vaddr_src with value at vaddr_dst
* @vaddr_dst: destination address
* @vaddr_src: source address
- * @dst_composer: destination framebuffer's metadata
- * @src_composer: source framebuffer's metadata
+ * @dst_frame_info: destination framebuffer's metadata
+ * @src_frame_info: source framebuffer's metadata
* @pixel_blend: blending equation based on plane format
*
* Blend the vaddr_src value with the vaddr_dst value using a pixel blend
@@ -111,33 +111,33 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst)
* pixel color values
*/
   static void blend(void *vaddr_dst, void *vaddr_src,
- struct vkms_composer *dst_composer,
- struct vkms_composer *src_composer,
+ struct vkms_frame_info *dst_frame_info,
+ struct vkms_frame_info *src_frame_info,
  void (*pixel_blend)(const u8 *, u8 *))
   {
int i, j, j_dst, i_dst;
int offset_src, offset_dst;
u8 *pixel_dst, *pixel_src;
-   int x_src = src_composer->src.x1 >> 16;
-   int y_src = src_composer->src.y1 >> 16;
+   int x_src = src_frame_info->src.x1 >> 16;
+   int y_src = src_frame_info->src.y1 >> 16;
-   int x_dst = src_composer->dst.x1;
-   int y_dst = src_composer->dst.y1;
-   int h_dst = drm_rect_height(&src_composer->dst);
-   int w_dst = drm_rect_width(&src_composer->dst);
+   int x_dst = src_frame_info->dst.x1;
+   int y_dst = src_frame_info->dst.y1;
+   int h_dst = drm_rect_height(&src_frame_info->dst);
+   int w_dst = drm_rect_width(&src_frame_info->dst)

Re: [RESEND v6 2/9] drm: vkms: Rename `vkms_composer` to `vkms_frame_info`

2022-08-22 Thread Igor Matheus Andrade Torrente

Hi Mellisa,

On 8/20/22 08:00, Melissa Wen wrote:

On 08/19, Igor Torrente wrote:

Changes the name of this struct to a more meaningful name.
A name that represents better what this struct is about.

Composer is the code that do the compositing of the planes.
This struct contains information on the frame used in the output
composition. Thus, vkms_frame_info is a better name to represent
this.

V5: Fix a commit message typo(Melissa Wen).

Reviewed-by: Melissa Wen 
Signed-off-by: Igor Torrente 
---
  drivers/gpu/drm/vkms/vkms_composer.c | 87 ++--
  drivers/gpu/drm/vkms/vkms_drv.h  |  6 +-
  drivers/gpu/drm/vkms/vkms_plane.c| 38 ++--
  3 files changed, 66 insertions(+), 65 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 775b97766e08..0aded4e87e60 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -11,11 +11,11 @@
  #include "vkms_drv.h"
  
  static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,

-const struct vkms_composer *composer)
+const struct vkms_frame_info *frame_info)
  {
u32 pixel;
-   int src_offset = composer->offset + (y * composer->pitch)
- + (x * composer->cpp);
+   int src_offset = frame_info->offset + (y * frame_info->pitch)
+   + (x * frame_info->cpp);
  
  	pixel = *(u32 *)&buffer[src_offset];
  
@@ -26,24 +26,24 @@ static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,

   * compute_crc - Compute CRC value on output frame
   *
   * @vaddr: address to final framebuffer
- * @composer: framebuffer's metadata
+ * @frame_info: framebuffer's metadata
   *
   * returns CRC value computed using crc32 on the visible portion of
   * the final framebuffer at vaddr_out
   */
  static uint32_t compute_crc(const u8 *vaddr,
-   const struct vkms_composer *composer)
+   const struct vkms_frame_info *frame_info)
  {
int x, y;
u32 crc = 0, pixel = 0;
-   int x_src = composer->src.x1 >> 16;
-   int y_src = composer->src.y1 >> 16;
-   int h_src = drm_rect_height(&composer->src) >> 16;
-   int w_src = drm_rect_width(&composer->src) >> 16;
+   int x_src = frame_info->src.x1 >> 16;
+   int y_src = frame_info->src.y1 >> 16;
+   int h_src = drm_rect_height(&frame_info->src) >> 16;
+   int w_src = drm_rect_width(&frame_info->src) >> 16;
  
  	for (y = y_src; y < y_src + h_src; ++y) {

for (x = x_src; x < x_src + w_src; ++x) {
-   pixel = get_pixel_from_buffer(x, y, vaddr, composer);
+   pixel = get_pixel_from_buffer(x, y, vaddr, frame_info);
crc = crc32_le(crc, (void *)&pixel, sizeof(u32));
}
}
@@ -98,8 +98,8 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst)
   * blend - blend value at vaddr_src with value at vaddr_dst
   * @vaddr_dst: destination address
   * @vaddr_src: source address
- * @dst_composer: destination framebuffer's metadata
- * @src_composer: source framebuffer's metadata
+ * @dst_frame_info: destination framebuffer's metadata
+ * @src_frame_info: source framebuffer's metadata
   * @pixel_blend: blending equation based on plane format
   *
   * Blend the vaddr_src value with the vaddr_dst value using a pixel blend
@@ -111,33 +111,33 @@ static void x_blend(const u8 *xrgb_src, u8 *xrgb_dst)
   * pixel color values
   */
  static void blend(void *vaddr_dst, void *vaddr_src,
- struct vkms_composer *dst_composer,
- struct vkms_composer *src_composer,
+ struct vkms_frame_info *dst_frame_info,
+ struct vkms_frame_info *src_frame_info,
  void (*pixel_blend)(const u8 *, u8 *))
  {
int i, j, j_dst, i_dst;
int offset_src, offset_dst;
u8 *pixel_dst, *pixel_src;
  
-	int x_src = src_composer->src.x1 >> 16;

-   int y_src = src_composer->src.y1 >> 16;
+   int x_src = src_frame_info->src.x1 >> 16;
+   int y_src = src_frame_info->src.y1 >> 16;
  
-	int x_dst = src_composer->dst.x1;

-   int y_dst = src_composer->dst.y1;
-   int h_dst = drm_rect_height(&src_composer->dst);
-   int w_dst = drm_rect_width(&src_composer->dst);
+   int x_dst = src_frame_info->dst.x1;
+   int y_dst = src_frame_info->dst.y1;
+   int h_dst = drm_rect_height(&src_frame_info->dst);
+   int w_dst = drm_rect_width(&src_frame_info->dst);
  
  	int y_limit = y_src + h_dst;

int x_limit = x_src + w_dst;
  
  	for (i = y_src, i_dst = y_dst; i < y_limit; ++i) {

for (j = x_src, j_dst = x_dst; j < x_limit; ++j) {
-   offset_dst = dst_composer->offset
-+ (i_dst * dst_composer->pitch)
- 

Re: [RESEND v6 6/9] drm: vkms: Refactor the plane composer to accept new formats

2022-08-22 Thread Igor Matheus Andrade Torrente

Hi Melissa,

On 8/20/22 07:51, Melissa Wen wrote:

On 08/19, Igor Torrente wrote:

Currently the blend function only accepts XRGB_ and ARGB_
as a color input.

This patch refactors all the functions related to the plane composition
to overcome this limitation.

The pixels blend is done using the new internal format. And new handlers
are being added to convert a specific format to/from this internal format.

So the blend operation depends on these handlers to convert to this common
format. The blended result, if necessary, is converted to the writeback
buffer format.

This patch introduces three major differences to the blend function.
1 - All the planes are blended at once.
2 - The blend calculus is done as per line instead of per pixel.
3 - It is responsible to calculates the CRC and writing the writeback
buffer(if necessary).

These changes allow us to allocate way less memory in the intermediate
buffer to compute these operations. Because now we don't need to
have the entire intermediate image lines at once, just one line is
enough.

| Memory consumption (output dimensions) |
|:--:|
|   Current  | This patch|
|:--:|:-:|
|   Width * Heigth   | 2 * Width |

Beyond memory, we also have a minor performance benefit from all
these changes. Results running the IGT[1] test
`igt@kms_cursor_crc@pipe-a-cursor-512x512-onscreen` ten times:

| Frametime  |
|:--:|
|  Implementation |  Current  |  This commit |
|:---:|:-:|::|
| frametime range |  9~22 ms  |5~17 ms   |
| Average |  11.4 ms  |7.8 ms|

[1] IGT commit id: bc3f6833a12221a46659535dac06ebb312490eb4

V2: Improves the performance drastically, by performing the operations
 per-line and not per-pixel(Pekka Paalanen).
 Minor improvements(Pekka Paalanen).
V3: Changes the code to blend the planes all at once. This improves
 performance, memory consumption, and removes much of the weirdness
 of the V2(Pekka Paalanen and me).
 Minor improvements(Pekka Paalanen and me).
V4: Rebase the code and adapt it to the new NUM_OVERLAY_PLANES constant.
V5: Minor checkpatch fixes and the removal of TO-DO item(Melissa Wen).
 Several security/robustness improvents(Pekka Paalanen).
 Removes check_planes_x_bounds function and allows partial
 partly off-screen(Pekka Paalanen).
V6: Fix a mismatch of some variable sizes (Pekka Paalanen).
 Several minor improvements (Pekka Paalanen).

Reported-by: kernel test robot 
Signed-off-by: Igor Torrente 
---
  Documentation/gpu/vkms.rst|   4 -
  drivers/gpu/drm/vkms/Makefile |   1 +
  drivers/gpu/drm/vkms/vkms_composer.c  | 320 --
  drivers/gpu/drm/vkms/vkms_formats.c   | 155 +
  drivers/gpu/drm/vkms/vkms_formats.h   |  12 +
  drivers/gpu/drm/vkms/vkms_plane.c |   3 +
  drivers/gpu/drm/vkms/vkms_writeback.c |   3 +
  7 files changed, 317 insertions(+), 181 deletions(-)
  create mode 100644 drivers/gpu/drm/vkms/vkms_formats.c
  create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h

diff --git a/Documentation/gpu/vkms.rst b/Documentation/gpu/vkms.rst
index 973e2d43108b..a49e4ae92653 100644
--- a/Documentation/gpu/vkms.rst
+++ b/Documentation/gpu/vkms.rst
@@ -118,10 +118,6 @@ Add Plane Features
  
  There's lots of plane features we could add support for:
  
-- Clearing primary plane: clear primary plane before plane composition (at the

-  start) for correctness of pixel blend ops. It also guarantees alpha channel
-  is cleared in the target buffer for stable crc. [Good to get started]
-
  - ARGB format on primary plane: blend the primary plane into background with
translucent alpha.
  
diff --git a/drivers/gpu/drm/vkms/Makefile b/drivers/gpu/drm/vkms/Makefile

index 72f779cbfedd..1b28a6a32948 100644
--- a/drivers/gpu/drm/vkms/Makefile
+++ b/drivers/gpu/drm/vkms/Makefile
@@ -3,6 +3,7 @@ vkms-y := \
vkms_drv.o \
vkms_plane.o \
vkms_output.o \
+   vkms_formats.o \
vkms_crtc.o \
vkms_composer.o \
vkms_writeback.o
diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index b9fb408e8973..5b1a8bdd8268 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -7,204 +7,188 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "vkms_drv.h"
  
-static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,

-const struct vkms_frame_info *frame_info)
+static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha)
  {
-   u32 pixel;
-   int src_offset = frame_info->offset + (y * frame_info->pitch)
-   + (x * frame_info->cpp);
+   u32 new_color;
  
-	pixel = *(u32 *)&buffer[src_offset];

+   new_color = (src * 0x + d

Re: [PATCH 6/6] drm: vkms: Refactor the plane composer to accept new formats

2021-10-19 Thread Igor Matheus Andrade Torrente

Hi Pekka,

On 10/19/21 5:05 AM, Pekka Paalanen wrote:

On Mon, 18 Oct 2021 16:26:06 -0300
Igor Matheus Andrade Torrente  wrote:


Hi Pekka,

On 10/18/21 5:30 AM, Pekka Paalanen wrote:

On Tue,  5 Oct 2021 17:16:37 -0300
Igor Matheus Andrade Torrente  wrote:
   

Currently the blend function only accepts XRGB_ and ARGB_
as a color input.

This patch refactors all the functions related to the plane composition
to overcome this limitation.

Now the blend function receives a format handler to each plane and a
blend function pointer. It will take two ARGB_1616161616 pixels, one
for each handler, and will use the blend function to calculate and
store the final color in the output buffer.

These format handlers will receive the `vkms_composer` and a pair of
coordinates. And they should return the respective pixel in the
ARGB_16161616 format.

The blend function will receive two ARGB_16161616 pixels, x, y, and
the vkms_composer of the output buffer. The method should perform the
blend operation and store output to the format aforementioned
ARGB_16161616.


Hi,

here are some drive-by comments which you are free to take or leave.

To find more efficient ways to do this, you might want research 
library. It's MIT licensed.


IIRC, the elementary operations there operate on pixel lines (pointer +
length), not individual pixels (image, x, y). Input conversion function
reads and converts a whole line a time. Blending function blends two
lines and writes the output into back one of the lines. Output
conversion function takes a line and converts it into destination
buffer. That way the elementary operations do not need to take stride
into account, and blending does not need to deal with varying memory
alignment FWIW. The inner loops involve much less function calls and
state, probably.


I was doing some rudimentary profiling, and I noticed that the code
spends most of the time with the indirect system call overhead and not
the actual computation. This can definitively help with it.


Hi,

I suppose you mean function (pointer) call, not system call?


Yes, I misspelled it.



Really good that you have already profiled things. All optimisations
should be guided by profiling, otherwise they are just guesses (and I
got lucky this time I guess).


Pixman may not do exactly this, but it does something very similar.
Pixman also has multiple different levels of optimisations, which may
not be necessary for VKMS.

It's a trade-off between speed and temporary memory consumed. You need
temporary buffers for two lines, but not more (not a whole image in
full intermediate precision). Further optimisation could determine
whether to process whole image rows as lines, or split a row into
multiple lines to stay within CPU cache size.
   


Sorry, I didn't understand the idea of the last sentence.


If an image is very wide, a single row could still be relatively large
in size (bytes). If it is large enough that the working set falls out
of a faster CPU cache into a slower CPU cache (or worse yet, into RAM
accesses), performance may suffer and become memory bandwidth limited
rather than ALU rate limited. Theoretically that can be worked around
by limiting the maximum size of a line, and splitting an image row into
multiple lines.


Got it now, thanks.



However, this is an optimisation one probably should not do until there
is performance profiling data showing that it actually is useful. The
optimal line size limit depends on the CPU model as well. So it's a bit
far out, but something you could keep in mind just in case.


OK. For now I will not deal with this complexity, and if necessary I
will revisit it latter.




Since it seems you are blending multiple planes into a single
destination which is assumed to be opaque, you might also be able to do
the multiple blends without convert-writing and read-converting to/from
the destination between every plane. I think that might be possible to
architect on top of the per-line operations still.


I tried it. But I don't know how to do this without looking like a mess.


I don't think it changes anything, but I forgot to mention that I tried
it with the blend per pixel and not a per line.



Dedicate one of the temporary line buffers for the destination, and
instead of writing it out and loading it back for each input plane,
leave it in place over all planes and write it out just once at the end.

I do expect more state tracking needed. You need to iterate over the
list of planes for each output row, extract only the used part of an
input plane's buffer into the other temporary line buffer, and offset
the destination line buffer and length to match when passing those into
a blending function.+


It's exactly this state tracking that was a mess when I was trying to
implement something similar. But this is one more thing that probably
I will have to revisit later.



It's not an obvious win but a trade-off, so profiling is again needed.


Re: [PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats

2021-10-19 Thread Igor Matheus Andrade Torrente

Hi Thomas,

On 10/19/21 4:17 AM, Thomas Zimmermann wrote:

Hi

Am 18.10.21 um 21:32 schrieb Igor Matheus Andrade Torrente:

Hi Thomas,

On 10/18/21 3:06 PM, Thomas Zimmermann wrote:

Hi

Am 18.10.21 um 19:41 schrieb Igor Matheus Andrade Torrente:

Hello,

On 10/18/21 7:14 AM, Thomas Zimmermann wrote:

Hi

Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente:

Currently, the vkms atomic check only goes through the first
position of
the `vkms_wb_formats` vector.

This change prepares the atomic_check to check the entire vector.

Signed-off-by: Igor Matheus Andrade Torrente 
---
   drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++-
   1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c
b/drivers/gpu/drm/vkms/vkms_writeback.c
index 5a3e12f105dc..56978f499203 100644
--- a/drivers/gpu/drm/vkms/vkms_writeback.c
+++ b/drivers/gpu/drm/vkms/vkms_writeback.c
@@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct
drm_encoder *encoder,
   {
   struct drm_framebuffer *fb;
   const struct drm_display_mode *mode = &crtc_state->mode;
+    bool format_supported = false;
+    int i;
   if (!conn_state->writeback_job ||
!conn_state->writeback_job->fb)
   return 0;
@@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct
drm_encoder *encoder,
   return -EINVAL;
   }
-    if (fb->format->format != vkms_wb_formats[0]) {
+    for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) {
+    if (fb->format->format == vkms_wb_formats[i]) {
+    format_supported = true;
+    break;
+    }
+    }


At a minimum, this loop should be in a helper function. But more
generally, I'm surprised that this isn't already covered by the
DRM's atomic helpers.


Ok, I can wrap it in a new function.

AFAIK the DRM doesn't cover it. But I may be wrong...


I couldn't find anything either.

Other drivers do similar format and frambuffer checks. So I guess a
helper could be implemented. All plane's are supposed to call
drm_atomic_helper_check_plane_state() in their atomic_check() code.
You could add a similar helper, say
drm_atomic_helper_check_writeback_encoder_state(), that tests for the
format and maybe other things as well.


Do you think this should be done before or after this patch series?


Just add it as part of this series and use it for vkms. Other drivers
can adopt it later on. The rcar-du code [1] looks similar to the one in
vkms. Maybe put the common tests in to the new helper. You can extract
the list of supported formats from the property blob, I think.



OK, Thanks!


Best regards
Thomas

[1]
https://elixir.bootlin.com/linux/v5.14.13/source/drivers/gpu/drm/rcar-du/rcar_du_writeback.c#L140





Best regards
Thomas





Best regards
Thomas


+
+    if (!format_supported) {
   DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
     &fb->format->format);
   return -EINVAL;







Thanks,
Igor Torrente




Re: [PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats

2021-10-18 Thread Igor Matheus Andrade Torrente

Hi Thomas,

On 10/18/21 3:06 PM, Thomas Zimmermann wrote:

Hi

Am 18.10.21 um 19:41 schrieb Igor Matheus Andrade Torrente:

Hello,

On 10/18/21 7:14 AM, Thomas Zimmermann wrote:

Hi

Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente:
Currently, the vkms atomic check only goes through the first 
position of

the `vkms_wb_formats` vector.

This change prepares the atomic_check to check the entire vector.

Signed-off-by: Igor Matheus Andrade Torrente 
---
  drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++-
  1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c 
b/drivers/gpu/drm/vkms/vkms_writeback.c

index 5a3e12f105dc..56978f499203 100644
--- a/drivers/gpu/drm/vkms/vkms_writeback.c
+++ b/drivers/gpu/drm/vkms/vkms_writeback.c
@@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct 
drm_encoder *encoder,

  {
  struct drm_framebuffer *fb;
  const struct drm_display_mode *mode = &crtc_state->mode;
+    bool format_supported = false;
+    int i;
  if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
  return 0;
@@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct 
drm_encoder *encoder,

  return -EINVAL;
  }
-    if (fb->format->format != vkms_wb_formats[0]) {
+    for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) {
+    if (fb->format->format == vkms_wb_formats[i]) {
+    format_supported = true;
+    break;
+    }
+    }


At a minimum, this loop should be in a helper function. But more 
generally, I'm surprised that this isn't already covered by the DRM's 
atomic helpers.


Ok, I can wrap it in a new function.

AFAIK the DRM doesn't cover it. But I may be wrong...


I couldn't find anything either.

Other drivers do similar format and frambuffer checks. So I guess a 
helper could be implemented. All plane's are supposed to call 
drm_atomic_helper_check_plane_state() in their atomic_check() code. You 
could add a similar helper, say 
drm_atomic_helper_check_writeback_encoder_state(), that tests for the 
format and maybe other things as well.


Do you think this should be done before or after this patch series?



Best regards
Thomas





Best regards
Thomas


+
+    if (!format_supported) {
  DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
    &fb->format->format);
  return -EINVAL;







Thanks,
Igor Torrente


Re: [PATCH 6/6] drm: vkms: Refactor the plane composer to accept new formats

2021-10-18 Thread Igor Matheus Andrade Torrente

Hi Pekka,

On 10/18/21 5:30 AM, Pekka Paalanen wrote:

On Tue,  5 Oct 2021 17:16:37 -0300
Igor Matheus Andrade Torrente  wrote:


Currently the blend function only accepts XRGB_ and ARGB_
as a color input.

This patch refactors all the functions related to the plane composition
to overcome this limitation.

Now the blend function receives a format handler to each plane and a
blend function pointer. It will take two ARGB_1616161616 pixels, one
for each handler, and will use the blend function to calculate and
store the final color in the output buffer.

These format handlers will receive the `vkms_composer` and a pair of
coordinates. And they should return the respective pixel in the
ARGB_16161616 format.

The blend function will receive two ARGB_16161616 pixels, x, y, and
the vkms_composer of the output buffer. The method should perform the
blend operation and store output to the format aforementioned
ARGB_16161616.


Hi,

here are some drive-by comments which you are free to take or leave.

To find more efficient ways to do this, you might want research Pixman
library. It's MIT licensed.

IIRC, the elementary operations there operate on pixel lines (pointer +
length), not individual pixels (image, x, y). Input conversion function
reads and converts a whole line a time. Blending function blends two
lines and writes the output into back one of the lines. Output
conversion function takes a line and converts it into destination
buffer. That way the elementary operations do not need to take stride
into account, and blending does not need to deal with varying memory
alignment FWIW. The inner loops involve much less function calls and
state, probably.


I was doing some rudimentary profiling, and I noticed that the code 
spends most of the time with the indirect system call overhead and not 
the actual computation. This can definitively help with it.




Pixman may not do exactly this, but it does something very similar.
Pixman also has multiple different levels of optimisations, which may
not be necessary for VKMS.

It's a trade-off between speed and temporary memory consumed. You need
temporary buffers for two lines, but not more (not a whole image in
full intermediate precision). Further optimisation could determine
whether to process whole image rows as lines, or split a row into
multiple lines to stay within CPU cache size.



Sorry, I didn't understand the idea of the last sentence.


Since it seems you are blending multiple planes into a single
destination which is assumed to be opaque, you might also be able to do
the multiple blends without convert-writing and read-converting to/from
the destination between every plane. I think that might be possible to
architect on top of the per-line operations still.


I tried it. But I don't know how to do this without looking like a mess. 
Does the pixman perform some similar?





Signed-off-by: Igor Matheus Andrade Torrente 
---
  drivers/gpu/drm/vkms/vkms_composer.c | 275 ++-
  drivers/gpu/drm/vkms/vkms_formats.h  | 125 
  2 files changed, 271 insertions(+), 129 deletions(-)
  create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h


...


+
+u64 ARGB_to_ARGB16161616(struct vkms_composer *composer, int x, int y)
+{
+   u8 *pixel_addr = packed_pixels_addr(composer, x, y);
+
+   /*
+* Organizes the channels in their respective positions and converts
+* the 8 bits channel to 16.
+* The 257 is the "conversion ratio". This number is obtained by the
+* (2^16 - 1) / (2^8 - 1) division. Which, in this case, tries to get
+* the best color value in a color space with more possibilities.


Pixel format, not color space. >

+* And a similar idea applies to others RGB color conversions.
+*/
+   return ((u64)pixel_addr[3] * 257) << 48 |
+  ((u64)pixel_addr[2] * 257) << 32 |
+  ((u64)pixel_addr[1] * 257) << 16 |
+  ((u64)pixel_addr[0] * 257);


I wonder if the compiler is smart enough to choose between "mul 257"
and "(v << 8) | v" operations... but that's probably totally drowning
under the overhead of per (x,y) looping.


I disassembled the code to check it. And looks like the compiler is 
replacing the multiplication with shifts and additions.


ARGB_to_ARGB16161616:
   0x816ad660 <+0>: imul   0x12c(%rdi),%edx
   0x816ad667 <+7>: imul   0x130(%rdi),%esi
   0x816ad66e <+14>:add%edx,%esi
   0x816ad670 <+16>:add0x128(%rdi),%esi
   0x816ad676 <+22>:movslq %esi,%rax
   0x816ad679 <+25>:add0xe8(%rdi),%rax
   0x816ad680 <+32>:movzbl 0x3(%rax),%ecx
   0x816ad684 <+36>:movzbl 0x2(%rax),%esi
   0x816ad688 <+40>:mov%rcx,%rdx
   0x816ad68b <+43>:shl$

Re: [PATCH 1/6] drm: vkms: Replace the deprecated drm_mode_config_init

2021-10-18 Thread Igor Matheus Andrade Torrente

Hi Thomas,

On 10/18/21 7:02 AM, Thomas Zimmermann wrote:

Hi

Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente:

The `drm_mode_config_init` was deprecated since c3b790e commit, and it's
being replaced by the `drmm_mode_config_init`.

Signed-off-by: Igor Matheus Andrade Torrente 
---
  drivers/gpu/drm/vkms/vkms_drv.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_drv.c 
b/drivers/gpu/drm/vkms/vkms_drv.c

index 0ffe5f0e33f7..828868920494 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.c
+++ b/drivers/gpu/drm/vkms/vkms_drv.c
@@ -140,8 +140,11 @@ static const struct drm_mode_config_helper_funcs 
vkms_mode_config_helpers = {

  static int vkms_modeset_init(struct vkms_device *vkmsdev)
  {
  struct drm_device *dev = &vkmsdev->drm;
+    int ret = drmm_mode_config_init(dev);
+
+    if (ret < 0)
+    return ret;


The style looks awkward IMHO. Rather use

I don't think it's awkward. But I don't mind change it anyway.



  int ret

  ret = drmm_mode_config_init()
  if (ret)
     return ret;


-    drm_mode_config_init(dev);
  dev->mode_config.funcs = &vkms_mode_funcs;
  dev->mode_config.min_width = XRES_MIN;
  dev->mode_config.min_height = YRES_MIN;





Re: [PATCH 0/6] Refactor the vkms to accept new formats

2021-10-18 Thread Igor Matheus Andrade Torrente

Hi pq,

On 10/18/21 4:53 AM, Pekka Paalanen wrote:

On Tue,  5 Oct 2021 17:16:31 -0300
Igor Matheus Andrade Torrente  wrote:


XRGB to ARGB behavior
=
During the development, I decided to always fill the alpha channel of
the output pixel whenever the conversion from a format without an alpha
channel to ARGB16161616 is necessary. Therefore, I ignore the value
received from the XRGB and overwrite the value with 0x.

My question is, is this behavior acceptable?


Hi,

that is the expected behaviour. X channel values must never affect
anything on screen, hence they must never affect other channels'
values. You are free to completely ignore X channel values, and if your
output buffer has X channel, then you are free to write (or not write,
unless for security reasons) whatever into it.



Great!! And thanks!!!



Thanks,
pq



[1] https://lists.freedesktop.org/archives/igt-dev/2021-October/036125.html

Igor Matheus Andrade Torrente (6):
   drm: vkms: Replace the deprecated drm_mode_config_init
   drm: vkms: Alloc the compose frame using vzalloc
   drm: vkms: Replace hardcoded value of `vkms_composer.map` to
 DRM_FORMAT_MAX_PLANES
   drm: vkms: Add fb information to `vkms_writeback_job`
   drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple
 formats
   drm: vkms: Refactor the plane composer to accept new formats

  drivers/gpu/drm/vkms/vkms_composer.c  | 275 ++
  drivers/gpu/drm/vkms/vkms_drv.c   |   5 +-
  drivers/gpu/drm/vkms/vkms_drv.h   |  12 +-
  drivers/gpu/drm/vkms/vkms_formats.h   | 125 
  drivers/gpu/drm/vkms/vkms_writeback.c |  27 ++-
  5 files changed, 304 insertions(+), 140 deletions(-)
  create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h





Re: [PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats

2021-10-18 Thread Igor Matheus Andrade Torrente

Hello,

On 10/18/21 7:14 AM, Thomas Zimmermann wrote:

Hi

Am 05.10.21 um 22:16 schrieb Igor Matheus Andrade Torrente:

Currently, the vkms atomic check only goes through the first position of
the `vkms_wb_formats` vector.

This change prepares the atomic_check to check the entire vector.

Signed-off-by: Igor Matheus Andrade Torrente 
---
  drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++-
  1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c 
b/drivers/gpu/drm/vkms/vkms_writeback.c

index 5a3e12f105dc..56978f499203 100644
--- a/drivers/gpu/drm/vkms/vkms_writeback.c
+++ b/drivers/gpu/drm/vkms/vkms_writeback.c
@@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct 
drm_encoder *encoder,

  {
  struct drm_framebuffer *fb;
  const struct drm_display_mode *mode = &crtc_state->mode;
+    bool format_supported = false;
+    int i;
  if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
  return 0;
@@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct 
drm_encoder *encoder,

  return -EINVAL;
  }
-    if (fb->format->format != vkms_wb_formats[0]) {
+    for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) {
+    if (fb->format->format == vkms_wb_formats[i]) {
+    format_supported = true;
+    break;
+    }
+    }


At a minimum, this loop should be in a helper function. But more 
generally, I'm surprised that this isn't already covered by the DRM's 
atomic helpers.


Ok, I can wrap it in a new function.

AFAIK the DRM doesn't cover it. But I may be wrong...



Best regards
Thomas


+
+    if (!format_supported) {
  DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
    &fb->format->format);
  return -EINVAL;





[PATCH 6/6] drm: vkms: Refactor the plane composer to accept new formats

2021-10-05 Thread Igor Matheus Andrade Torrente
Currently the blend function only accepts XRGB_ and ARGB_
as a color input.

This patch refactors all the functions related to the plane composition
to overcome this limitation.

Now the blend function receives a format handler to each plane and a
blend function pointer. It will take two ARGB_1616161616 pixels, one
for each handler, and will use the blend function to calculate and
store the final color in the output buffer.

These format handlers will receive the `vkms_composer` and a pair of
coordinates. And they should return the respective pixel in the
ARGB_16161616 format.

The blend function will receive two ARGB_16161616 pixels, x, y, and
the vkms_composer of the output buffer. The method should perform the
blend operation and store output to the format aforementioned
ARGB_16161616.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/vkms/vkms_composer.c | 275 ++-
 drivers/gpu/drm/vkms/vkms_formats.h  | 125 
 2 files changed, 271 insertions(+), 129 deletions(-)
 create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 82f79e508f81..1e7c10c02a52 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -9,18 +9,28 @@
 #include 
 
 #include "vkms_drv.h"
-
-static u32 get_pixel_from_buffer(int x, int y, const u8 *buffer,
-const struct vkms_composer *composer)
-{
-   u32 pixel;
-   int src_offset = composer->offset + (y * composer->pitch)
- + (x * composer->cpp);
-
-   pixel = *(u32 *)&buffer[src_offset];
-
-   return pixel;
-}
+#include "vkms_formats.h"
+
+#define get_output_vkms_composer(buffer_pointer, composer) \
+   ((struct vkms_composer) {   \
+   .fb = (struct drm_framebuffer) {\
+   .format = &(struct drm_format_info) {   \
+   .format = DRM_FORMAT_ARGB16161616,  \
+   },  \
+   },  \
+   .map[0].vaddr = (buffer_pointer),   \
+   .src = (composer)->src, \
+   .dst = (composer)->dst, \
+   .cpp = sizeof(u64), \
+   .pitch = drm_rect_width(&(composer)->dst) * sizeof(u64) \
+   })
+
+struct vkms_pixel_composition_functions {
+   u64 (*get_src_pixel)(struct vkms_composer *composer, int x, int y);
+   u64 (*get_dst_pixel)(struct vkms_composer *composer, int x, int y);
+   void (*pixel_blend)(u64 argb_src1, u64 argb_src2, int x, int y,
+   struct vkms_composer *dst_composer);
+};
 
 /**
  * compute_crc - Compute CRC value on output frame
@@ -31,42 +41,33 @@ static u32 get_pixel_from_buffer(int x, int y, const u8 
*buffer,
  * returns CRC value computed using crc32 on the visible portion of
  * the final framebuffer at vaddr_out
  */
-static uint32_t compute_crc(const u8 *vaddr,
+static uint32_t compute_crc(const __le64 *vaddr,
const struct vkms_composer *composer)
 {
-   int x, y;
-   u32 crc = 0, pixel = 0;
-   int x_src = composer->src.x1 >> 16;
-   int y_src = composer->src.y1 >> 16;
-   int h_src = drm_rect_height(&composer->src) >> 16;
-   int w_src = drm_rect_width(&composer->src) >> 16;
-
-   for (y = y_src; y < y_src + h_src; ++y) {
-   for (x = x_src; x < x_src + w_src; ++x) {
-   pixel = get_pixel_from_buffer(x, y, vaddr, composer);
-   crc = crc32_le(crc, (void *)&pixel, sizeof(u32));
-   }
-   }
+   int h = drm_rect_height(&composer->dst);
+   int w = drm_rect_width(&composer->dst);
 
-   return crc;
+   return crc32_le(0, (void *)vaddr, w * h * sizeof(u64));
 }
 
-static u8 blend_channel(u8 src, u8 dst, u8 alpha)
+static __le16 blend_channel(u16 src, u16 dst, u16 alpha)
 {
-   u32 pre_blend;
-   u8 new_color;
+   u64 pre_blend;
+   u16 new_color;
 
-   pre_blend = (src * 255 + dst * (255 - alpha));
+   pre_blend = (src * 0x + dst * (0x - alpha));
 
-   /* Faster div by 255 */
-   new_color = ((pre_blend + ((pre_blend + 257) >> 8)) >> 8);
+   new_color = DIV_ROUND_UP(pre_blend, 0x);
 
-   return new_color;
+   return cpu_to_le16(new_color);
 }
 
 /**
  * alpha_blend - alpha blending equation
- * @argb_src: src pixel on premultiplied alpha mode
+ * @argb_src1: pixel of the source plane on premultiplied alpha mo

[PATCH 5/6] drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple formats

2021-10-05 Thread Igor Matheus Andrade Torrente
Currently, the vkms atomic check only goes through the first position of
the `vkms_wb_formats` vector.

This change prepares the atomic_check to check the entire vector.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/vkms/vkms_writeback.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c 
b/drivers/gpu/drm/vkms/vkms_writeback.c
index 5a3e12f105dc..56978f499203 100644
--- a/drivers/gpu/drm/vkms/vkms_writeback.c
+++ b/drivers/gpu/drm/vkms/vkms_writeback.c
@@ -30,6 +30,8 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder 
*encoder,
 {
struct drm_framebuffer *fb;
const struct drm_display_mode *mode = &crtc_state->mode;
+   bool format_supported = false;
+   int i;
 
if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
return 0;
@@ -41,7 +43,14 @@ static int vkms_wb_encoder_atomic_check(struct drm_encoder 
*encoder,
return -EINVAL;
}
 
-   if (fb->format->format != vkms_wb_formats[0]) {
+   for (i = 0; i < ARRAY_SIZE(vkms_wb_formats); i++) {
+   if (fb->format->format == vkms_wb_formats[i]) {
+   format_supported = true;
+   break;
+   }
+   }
+
+   if (!format_supported) {
DRM_DEBUG_KMS("Invalid pixel format %p4cc\n",
  &fb->format->format);
return -EINVAL;
-- 
2.30.2



[PATCH 4/6] drm: vkms: Add fb information to `vkms_writeback_job`

2021-10-05 Thread Igor Matheus Andrade Torrente
This commit is the groundwork to introduce new formats to the planes and
writeback buffer. As part of it, a new buffer metadata field is added to
`vkms_writeback_job`, this metadata is represented by the `vkms_composer`
struct.

This will allow us, in the future, to have different compositing and wb
format types.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/vkms/vkms_drv.h   | 10 +-
 drivers/gpu/drm/vkms/vkms_writeback.c | 16 +---
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
index 64e62993b06f..d62f8ebd454b 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.h
+++ b/drivers/gpu/drm/vkms/vkms_drv.h
@@ -20,11 +20,6 @@
 #define XRES_MAX  8192
 #define YRES_MAX  8192
 
-struct vkms_writeback_job {
-   struct dma_buf_map map[DRM_FORMAT_MAX_PLANES];
-   struct dma_buf_map data[DRM_FORMAT_MAX_PLANES];
-};
-
 struct vkms_composer {
struct drm_framebuffer fb;
struct drm_rect src, dst;
@@ -34,6 +29,11 @@ struct vkms_composer {
unsigned int cpp;
 };
 
+struct vkms_writeback_job {
+   struct dma_buf_map data[DRM_FORMAT_MAX_PLANES];
+   struct vkms_composer composer;
+};
+
 /**
  * vkms_plane_state - Driver specific plane state
  * @base: base plane state
diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c 
b/drivers/gpu/drm/vkms/vkms_writeback.c
index 8694227f555f..5a3e12f105dc 100644
--- a/drivers/gpu/drm/vkms/vkms_writeback.c
+++ b/drivers/gpu/drm/vkms/vkms_writeback.c
@@ -75,7 +75,7 @@ static int vkms_wb_prepare_job(struct drm_writeback_connector 
*wb_connector,
if (!vkmsjob)
return -ENOMEM;
 
-   ret = drm_gem_fb_vmap(job->fb, vkmsjob->map, vkmsjob->data);
+   ret = drm_gem_fb_vmap(job->fb, vkmsjob->composer.map, vkmsjob->data);
if (ret) {
DRM_ERROR("vmap failed: %d\n", ret);
goto err_kfree;
@@ -99,7 +99,7 @@ static void vkms_wb_cleanup_job(struct 
drm_writeback_connector *connector,
if (!job->fb)
return;
 
-   drm_gem_fb_vunmap(job->fb, vkmsjob->map);
+   drm_gem_fb_vunmap(job->fb, vkmsjob->composer.map);
 
vkmsdev = drm_device_to_vkms_device(job->fb->dev);
vkms_set_composer(&vkmsdev->output, false);
@@ -116,14 +116,24 @@ static void vkms_wb_atomic_commit(struct drm_connector 
*conn,
struct drm_writeback_connector *wb_conn = &output->wb_connector;
struct drm_connector_state *conn_state = wb_conn->base.state;
struct vkms_crtc_state *crtc_state = output->composer_state;
+   struct drm_framebuffer *fb = connector_state->writeback_job->fb;
+   struct vkms_writeback_job *active_wb;
+   struct vkms_composer *wb_composer;
 
if (!conn_state)
return;
 
vkms_set_composer(&vkmsdev->output, true);
 
+   active_wb = conn_state->writeback_job->priv;
+   wb_composer = &active_wb->composer;
+
spin_lock_irq(&output->composer_lock);
-   crtc_state->active_writeback = conn_state->writeback_job->priv;
+   crtc_state->active_writeback = active_wb;
+   memcpy(&wb_composer->fb, fb, sizeof(struct drm_framebuffer));
+   wb_composer->offset = fb->offsets[0];
+   wb_composer->pitch = fb->pitches[0];
+   wb_composer->cpp = fb->format->cpp[0];
crtc_state->wb_pending = true;
spin_unlock_irq(&output->composer_lock);
drm_writeback_queue_job(wb_conn, connector_state);
-- 
2.30.2



[PATCH 3/6] drm: vkms: Replace hardcoded value of `vkms_composer.map` to DRM_FORMAT_MAX_PLANES

2021-10-05 Thread Igor Matheus Andrade Torrente
The `map` vector at `vkms_composer` uses a hardcoded value to define its
size.

If someday the maximum number of planes increases, this hardcoded value
can be a problem.

This value is being replaced with the DRM_FORMAT_MAX_PLANES macro.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/vkms/vkms_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
index d48c23d40ce5..64e62993b06f 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.h
+++ b/drivers/gpu/drm/vkms/vkms_drv.h
@@ -28,7 +28,7 @@ struct vkms_writeback_job {
 struct vkms_composer {
struct drm_framebuffer fb;
struct drm_rect src, dst;
-   struct dma_buf_map map[4];
+   struct dma_buf_map map[DRM_FORMAT_MAX_PLANES];
unsigned int offset;
unsigned int pitch;
unsigned int cpp;
-- 
2.30.2



[PATCH 2/6] drm: vkms: Alloc the compose frame using vzalloc

2021-10-05 Thread Igor Matheus Andrade Torrente
Currently, the memory to the composition frame is being allocated using
the kzmalloc. This comes with the limitation of maximum size of one
page size(which in the x86_64 is 4Kb and 4MB for default and hugepage
respectively).

Somes test of igt (e.g. kms_plane@pixel-format) uses more than 4MB when
testing some pixel formats like ARGB16161616.

This problem is addessed by allocating the memory using kvzalloc that
circunvents this limitation.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/vkms/vkms_composer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vkms/vkms_composer.c 
b/drivers/gpu/drm/vkms/vkms_composer.c
index 9e8204be9a14..82f79e508f81 100644
--- a/drivers/gpu/drm/vkms/vkms_composer.c
+++ b/drivers/gpu/drm/vkms/vkms_composer.c
@@ -180,7 +180,7 @@ static int compose_active_planes(void **vaddr_out,
int i;
 
if (!*vaddr_out) {
-   *vaddr_out = kzalloc(gem_obj->size, GFP_KERNEL);
+   *vaddr_out = kvzalloc(gem_obj->size, GFP_KERNEL);
if (!*vaddr_out) {
DRM_ERROR("Cannot allocate memory for output frame.");
return -ENOMEM;
@@ -263,7 +263,7 @@ void vkms_composer_worker(struct work_struct *work)
crtc_state);
if (ret) {
if (ret == -EINVAL && !wb_pending)
-   kfree(vaddr_out);
+   kvfree(vaddr_out);
return;
}
 
@@ -275,7 +275,7 @@ void vkms_composer_worker(struct work_struct *work)
crtc_state->wb_pending = false;
spin_unlock_irq(&out->composer_lock);
} else {
-   kfree(vaddr_out);
+   kvfree(vaddr_out);
}
 
/*
-- 
2.30.2



[PATCH 1/6] drm: vkms: Replace the deprecated drm_mode_config_init

2021-10-05 Thread Igor Matheus Andrade Torrente
The `drm_mode_config_init` was deprecated since c3b790e commit, and it's
being replaced by the `drmm_mode_config_init`.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/vkms/vkms_drv.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
index 0ffe5f0e33f7..828868920494 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.c
+++ b/drivers/gpu/drm/vkms/vkms_drv.c
@@ -140,8 +140,11 @@ static const struct drm_mode_config_helper_funcs 
vkms_mode_config_helpers = {
 static int vkms_modeset_init(struct vkms_device *vkmsdev)
 {
struct drm_device *dev = &vkmsdev->drm;
+   int ret = drmm_mode_config_init(dev);
+
+   if (ret < 0)
+   return ret;
 
-   drm_mode_config_init(dev);
dev->mode_config.funcs = &vkms_mode_funcs;
dev->mode_config.min_width = XRES_MIN;
dev->mode_config.min_height = YRES_MIN;
-- 
2.30.2



[PATCH 0/6] Refactor the vkms to accept new formats

2021-10-05 Thread Igor Matheus Andrade Torrente
Summary
===
This series of patches refactor some vkms components in order to introduce
new formats to the planes and writeback connector.

Now in the blend function, the plane's pixels are converted to ARGB16161616
and then blended together.

The CRC is calculated based on the ARGB1616161616 buffer. And if required,
this buffer is copied/converted to the writeback buffer format.

And to handle the pixel conversion, new functions were added to convert
from a specific format to ARGB16161616 (the reciprocal is also true).

Tests
=
This patch series was tested using the following igt tests:
-t ".*kms_plane.*"
-t ".*kms_writeback.*"
-t ".*kms_cursor_crc*"

New tests passing
---
- pipe-A-cursor-size-change
- pipe-A-cursor-alpha-transparent

Tests and Performance Regressions
-
This pack of tests is failing:
- pipe-A-cursor-*-rapid-movement

This is expected since there are more operations per pixel than before.
And consequently, the compositing is way slower than before.

My micro-profiling shows these ranges to the completion of each
compositing in the .*kms_cursor_crc.* tests:

|  Frametime |
|:---:|::|
|  before |   after  |
| 8~20 ms | 32~56 ms |

Hence, further optimizations will be required.

Writeback test
---
During the development of this patch series, I discovered that the
writeback-check-output test wasn't filling the plane correctly.

So, currently, this patch series is failing in this test. But I sent a
patch to igt to fix it[1].

XRGB to ARGB behavior
=
During the development, I decided to always fill the alpha channel of
the output pixel whenever the conversion from a format without an alpha
channel to ARGB16161616 is necessary. Therefore, I ignore the value
received from the XRGB and overwrite the value with 0x.

My question is, is this behavior acceptable?

[1] https://lists.freedesktop.org/archives/igt-dev/2021-October/036125.html

Igor Matheus Andrade Torrente (6):
  drm: vkms: Replace the deprecated drm_mode_config_init
  drm: vkms: Alloc the compose frame using vzalloc
  drm: vkms: Replace hardcoded value of `vkms_composer.map` to
DRM_FORMAT_MAX_PLANES
  drm: vkms: Add fb information to `vkms_writeback_job`
  drm: vkms: Prepare `vkms_wb_encoder_atomic_check` to accept multiple
formats
  drm: vkms: Refactor the plane composer to accept new formats

 drivers/gpu/drm/vkms/vkms_composer.c  | 275 ++
 drivers/gpu/drm/vkms/vkms_drv.c   |   5 +-
 drivers/gpu/drm/vkms/vkms_drv.h   |  12 +-
 drivers/gpu/drm/vkms/vkms_formats.h   | 125 
 drivers/gpu/drm/vkms/vkms_writeback.c |  27 ++-
 5 files changed, 304 insertions(+), 140 deletions(-)
 create mode 100644 drivers/gpu/drm/vkms/vkms_formats.h

-- 
2.30.2



Re: VKMS: New plane formats

2021-09-02 Thread Igor Matheus Andrade Torrente

On 9/1/21 5:24 PM, Simon Ser wrote:

Ideally the final composition format would have enough precision for
all of the planes. I think it'd make sense to use ARGB16161616 if the
primary plane uses ARGB and an overlay plane uses ARGB16161616.

To simplify the code, maybe it's fine to always use ARGB16161616 for
the output, and add getters which fetch an ARGB16161616 row for each
supported plane format.



This makes sense to me. I will try to implement this way.

Thanks!


VKMS: New plane formats

2021-09-01 Thread Igor Matheus Andrade Torrente

Hi,

I'm working to add new plane formats to vkms. But I don't know what 
should be the behavior in the situation that we received multiple planes 
with different formats from the users-space.


For example, if the user chooses:
- DRM_FORMAT_ARGB16161616 to the primary plane
- DRM_FORMAT_ARGB to the cursor
- DRM_FORMAT_YUV42 to the overlay

What should be the output format that will be used to calculate the crc? 
DRM_FORMAT_ARGB16161616?


My idea was to convert all the planes to the primary, but I'm not sure 
if it is the right approach.


Best regards,
---
Igor M. A. Torrente


[PATCH v3] drm: Align a comment block

2020-03-20 Thread Igor Matheus Andrade Torrente
Fix a checkpatch warning caused by a misaligned comment block.

Signed-off-by: Igor Matheus Andrade Torrente 

---
Changes in v2:
- Change subject text

Changes in V3
- Fix a typo in the commit message

drivers/gpu/drm/drm_gem.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index a9e4a610445a..564acc1f4030 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object 
*obj)
return;
 
/*
-   * Must bump handle count first as this may be the last
-   * ref, in which case the object would disappear before we
-   * checked for a name
-   */
+* Must bump handle count first as this may be the last
+* ref, in which case the object would disappear before we
+* checked for a name
+*/
 
mutex_lock(&dev->object_name_lock);
if (--obj->handle_count == 0) {
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2] drm: Alligne a comment block

2020-03-20 Thread Igor Matheus Andrade Torrente
Fix a checkpatch warning caused by a misaligned comment block.

Changes in v2:
- Change subject text

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/drm_gem.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index a9e4a610445a..564acc1f4030 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object 
*obj)
return;
 
/*
-   * Must bump handle count first as this may be the last
-   * ref, in which case the object would disappear before we
-   * checked for a name
-   */
+* Must bump handle count first as this may be the last
+* ref, in which case the object would disappear before we
+* checked for a name
+*/
 
mutex_lock(&dev->object_name_lock);
if (--obj->handle_count == 0) {
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2] drm: Correct a typo in a function comment

2020-03-17 Thread Igor Matheus Andrade Torrente
Replace "pionter" with "pointer" in the drm_gem_handle_create description.

Changes in v2:
- Change subject text

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/drm_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 6e960d57371e..c356379f5e97 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -432,7 +432,7 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
  * drm_gem_handle_create - create a gem handle for an object
  * @file_priv: drm file-private structure to register the handle for
  * @obj: object to register
- * @handlep: pionter to return the created handle to the caller
+ * @handlep: pointer to return the created handle to the caller
  *
  * Create a handle for this object. This adds a handle reference to the object,
  * which includes a regular reference count. Callers will likely want to
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH v2] drm: Alligne a comment block

2020-03-17 Thread Igor Matheus Andrade Torrente
Fix a checkpatch warning caused by a misaligned comment block.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/drm_gem.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 000fa4a1899f..6e960d57371e 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object 
*obj)
return;
 
/*
-   * Must bump handle count first as this may be the last
-   * ref, in which case the object would disappear before we
-   * checked for a name
-   */
+* Must bump handle count first as this may be the last
+* ref, in which case the object would disappear before we
+* checked for a name
+*/
 
mutex_lock(&dev->object_name_lock);
if (--obj->handle_count == 0) {
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel


[PATCH] Staging: drm_gem: Fix a misaligned comment block

2020-03-17 Thread Igor Matheus Andrade Torrente
Fix a checkpatch warning caused by a misaligned comment block.

Signed-off-by: Igor Matheus Andrade Torrente 
---
 drivers/gpu/drm/drm_gem.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 000fa4a1899f..6e960d57371e 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -222,10 +222,10 @@ drm_gem_object_handle_put_unlocked(struct drm_gem_object 
*obj)
return;
 
/*
-   * Must bump handle count first as this may be the last
-   * ref, in which case the object would disappear before we
-   * checked for a name
-   */
+* Must bump handle count first as this may be the last
+* ref, in which case the object would disappear before we
+* checked for a name
+*/
 
mutex_lock(&dev->object_name_lock);
if (--obj->handle_count == 0) {
-- 
2.20.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel