We found a regression in v5.10 on real-time server, using the
rt-kernel and the mgag200 driver. It's some really specialized
workload, with <10us latency expectation on isolated core.
After the v5.10, the real time tasks missed their <10us latency
when something prints on the screen (fbcon or printk)

The regression has been bisected to 2 commits:
commit 0b34d58b6c32 ("drm/mgag200: Enable caching for SHMEM pages")
commit 4862ffaec523 ("drm/mgag200: Move vmap out of commit tail")

The first one changed the system memory framebuffer from Write-Combine
to the default caching.
Before the second commit, the mgag200 driver used to unmap the
framebuffer after each frame, which implicitly does a cache flush.
Both regressions are fixed by this commit, which restore WC mapping
for the framebuffer in system memory, and add a cache flush.
This is only needed on x86_64, for low-latency workload,
so the new kconfig DRM_MGAG200_LATENCY_WORKAROUND depends on
PREEMPT_RT and X86.

For more context, the whole thread can be found here:
https://lore.kernel.org/dri-devel/20231019135655.313759-1-jfale...@redhat.com/

Signed-off-by: Jocelyn Falempe <jfale...@redhat.com>
---
 drivers/gpu/drm/mgag200/Kconfig        | 10 ++++++++++
 drivers/gpu/drm/mgag200/mgag200_drv.c  | 17 +++++++++++++++++
 drivers/gpu/drm/mgag200/mgag200_mode.c |  8 ++++++++
 3 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/mgag200/Kconfig b/drivers/gpu/drm/mgag200/Kconfig
index b28c5e4828f4..73e4feba743a 100644
--- a/drivers/gpu/drm/mgag200/Kconfig
+++ b/drivers/gpu/drm/mgag200/Kconfig
@@ -11,3 +11,13 @@ config DRM_MGAG200
         MGA G200 desktop chips and the server variants. It requires 0.3.0
         of the modesetting userspace driver, and a version of mga driver
         that will fail on KMS enabled devices.
+
+config DRM_MGAG200_LATENCY_WORKAROUND
+       bool "Enable workaround for low latency server"
+       depends on DRM_MGAG200 && PREEMPT_RT && X86
+       help
+         Enable a workaround to have better latency with mgag200 driver.
+         It restores the <v5.10 behavior, by mapping the framebuffer in system
+         RAM as Write-Combining, and flushing the cache after each write.
+         This is only needed on x86_64 and if you want low-latency.
+         If unsure, say N.
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c 
b/drivers/gpu/drm/mgag200/mgag200_drv.c
index 54fce00e2136..3fdef8b580cc 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -84,6 +84,20 @@ resource_size_t mgag200_probe_vram(void __iomem *mem, 
resource_size_t size)
        return offset - 65536;
 }
 
+#if defined(CONFIG_DRM_MGAG200_LATENCY_WORKAROUND)
+static struct drm_gem_object *mgag200_create_object(struct drm_device *dev, 
size_t size)
+{
+       struct drm_gem_shmem_object *shmem;
+
+       shmem = kzalloc(sizeof(*shmem), GFP_KERNEL);
+       if (!shmem)
+               return NULL;
+
+       shmem->map_wc = true;
+       return &shmem->base;
+}
+#endif
+
 /*
  * DRM driver
  */
@@ -99,6 +113,9 @@ static const struct drm_driver mgag200_driver = {
        .major = DRIVER_MAJOR,
        .minor = DRIVER_MINOR,
        .patchlevel = DRIVER_PATCHLEVEL,
+#if defined(CONFIG_DRM_MGAG200_LATENCY_WORKAROUND)
+       .gem_create_object = mgag200_create_object,
+#endif
        DRM_GEM_SHMEM_DRIVER_OPS,
 };
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c 
b/drivers/gpu/drm/mgag200/mgag200_mode.c
index 0eb769dd76ce..34ef9fb6e96c 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -13,6 +13,7 @@
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_cache.h>
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_format_helper.h>
@@ -436,6 +437,13 @@ static void mgag200_handle_damage(struct mga_device *mdev, 
const struct iosys_ma
 
        iosys_map_incr(&dst, drm_fb_clip_offset(fb->pitches[0], fb->format, 
clip));
        drm_fb_memcpy(&dst, fb->pitches, vmap, fb, clip);
+
+       /* Flushing the cache greatly improves latency on x86_64 */
+#if defined(CONFIG_DRM_MGAG200_LATENCY_WORKAROUND)
+       if (!vmap->is_iomem)
+               drm_clflush_virt_range(vmap->vaddr + clip->y1 * fb->pitches[0],
+                                      drm_rect_height(clip) * fb->pitches[0]);
+#endif
 }
 
 /*

base-commit: 1f36d634670d8001a45fe2f2dcae546819f9c7d8
-- 
2.43.0

Reply via email to