Re: [PATCH v3 6/7] video: Only dcache flush damaged lines

2023-01-06 Thread Simon Glass
Hi,

On Tue, 3 Jan 2023 at 13:25, Alexander Graf  wrote:
>
>
> On 30.12.22 22:12, Heinrich Schuchardt wrote:
> > On 12/30/22 20:58, Alexander Graf wrote:
> >> Now that we have a damage area tells us which parts of the frame buffer
> >> actually need updating, let's only dcache flush those on video_sync()
> >> calls. With this optimization in place, frame buffer updates -
> >> especially
> >> on large screen such as 4k displays - speed up significantly.
> >>
> >> Signed-off-by: Alexander Graf 
> >> Reported-by: Da Xue 
> >>
> >> ---
> >>
> >> v1 -> v2:
> >>
> >>- Fix dcache range; we were flushing too much before
> >>- Remove ifdefs
> >> ---
> >>   drivers/video/video-uclass.c | 51 ++--
> >>   1 file changed, 43 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
> >> index f1fbeaadcb..bf93f524c7 100644
> >> --- a/drivers/video/video-uclass.c
> >> +++ b/drivers/video/video-uclass.c
> >> @@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int
> >> y, int width, int height)
> >>   return 0;
> >>   }
> >>
> >> +#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
> >
> > ARM isn't the only architecture implementing flush_dcache_range().
> > So this condition needs to be fixed as well as the one in video_sync.
> >
> > Why don't you simply rely on priv->flash_dcache irrespective of the
> > architecture? Let the video drivers decide if they need it.
>
>
> This is code that already was #ifdef'ed before. The way I understand its
> history, we couldn't decisively ensure that flush_dcache_range() exists,
> so we couldn't build the code always.
>
> I agree that it looks weird and error prone though. I believe you're
> trying to say we should just make sure there's always a
> flush_dcache_range() function and keep the rest to the compiler?
>
Can  / should we use the dma_map stuff here?

Regards,
Simon


Re: [PATCH v3 6/7] video: Only dcache flush damaged lines

2023-01-03 Thread Alexander Graf



On 30.12.22 22:12, Heinrich Schuchardt wrote:

On 12/30/22 20:58, Alexander Graf wrote:

Now that we have a damage area tells us which parts of the frame buffer
actually need updating, let's only dcache flush those on video_sync()
calls. With this optimization in place, frame buffer updates - 
especially

on large screen such as 4k displays - speed up significantly.

Signed-off-by: Alexander Graf 
Reported-by: Da Xue 

---

v1 -> v2:

   - Fix dcache range; we were flushing too much before
   - Remove ifdefs
---
  drivers/video/video-uclass.c | 51 ++--
  1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
index f1fbeaadcb..bf93f524c7 100644
--- a/drivers/video/video-uclass.c
+++ b/drivers/video/video-uclass.c
@@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int 
y, int width, int height)

  return 0;
  }

+#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)


ARM isn't the only architecture implementing flush_dcache_range().
So this condition needs to be fixed as well as the one in video_sync.

Why don't you simply rely on priv->flash_dcache irrespective of the
architecture? Let the video drivers decide if they need it.



This is code that already was #ifdef'ed before. The way I understand its 
history, we couldn't decisively ensure that flush_dcache_range() exists, 
so we couldn't build the code always.


I agree that it looks weird and error prone though. I believe you're 
trying to say we should just make sure there's always a 
flush_dcache_range() function and keep the rest to the compiler?




Alex




Re: [PATCH v3 6/7] video: Only dcache flush damaged lines

2022-12-30 Thread Heinrich Schuchardt

On 12/30/22 20:58, Alexander Graf wrote:

Now that we have a damage area tells us which parts of the frame buffer
actually need updating, let's only dcache flush those on video_sync()
calls. With this optimization in place, frame buffer updates - especially
on large screen such as 4k displays - speed up significantly.

Signed-off-by: Alexander Graf 
Reported-by: Da Xue 

---

v1 -> v2:

   - Fix dcache range; we were flushing too much before
   - Remove ifdefs
---
  drivers/video/video-uclass.c | 51 ++--
  1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
index f1fbeaadcb..bf93f524c7 100644
--- a/drivers/video/video-uclass.c
+++ b/drivers/video/video-uclass.c
@@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int 
width, int height)
return 0;
  }

+#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)


ARM isn't the only architecture implementing flush_dcache_range().
So this condition needs to be fixed as well as the one in video_sync.

Why don't you simply rely on priv->flash_dcache irrespective of the
architecture? Let the video drivers decide if they need it.

Best regards

Heinrich


+static void video_flush_dcache(struct udevice *vid)
+{
+   struct video_priv *priv = dev_get_uclass_priv(vid);
+
+   if (!priv->flush_dcache)
+   return;
+
+   if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+   flush_dcache_range((ulong)priv->fb,
+  ALIGN((ulong)priv->fb + priv->fb_size,
+CONFIG_SYS_CACHELINE_SIZE));
+
+   return;
+   }
+
+   if (priv->damage.endx && priv->damage.endy) {
+   int lstart = priv->damage.x * VNBYTES(priv->bpix);
+   int lend = priv->damage.endx * VNBYTES(priv->bpix);
+   int y;
+
+   for (y = priv->damage.y; y < priv->damage.endy; y++) {
+   ulong fb = (ulong)priv->fb;
+   ulong start = fb + (y * priv->line_length) + lstart;
+   ulong end = start + lend - lstart;
+
+   start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE);
+   end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE);
+
+   flush_dcache_range(start, end);
+   }
+   }
+}
+#endif
+
  /* Flush video activity to the caches */
  int video_sync(struct udevice *vid, bool force)
  {
+   struct video_priv *priv = dev_get_uclass_priv(vid);
struct video_ops *ops = video_get_ops(vid);
int ret;

@@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force)
 * out whether it exists? For now, ARM is safe.
 */
  #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
-   struct video_priv *priv = dev_get_uclass_priv(vid);
-
-   if (priv->flush_dcache) {
-   flush_dcache_range((ulong)priv->fb,
-  ALIGN((ulong)priv->fb + priv->fb_size,
-CONFIG_SYS_CACHELINE_SIZE));
-   }
+   video_flush_dcache(vid);
  #elif defined(CONFIG_VIDEO_SANDBOX_SDL)
-   struct video_priv *priv = dev_get_uclass_priv(vid);
static ulong last_sync;

if (force || get_timer(last_sync) > 100) {
@@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force)
last_sync = get_timer(0);
}
  #endif
+
+   if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+   priv->damage.endx = 0;
+   priv->damage.endy = 0;
+   }
+
return 0;
  }





Re: [PATCH v3 6/7] video: Only dcache flush damaged lines

2022-12-30 Thread Heinrich Schuchardt

On 12/30/22 20:58, Alexander Graf wrote:

Now that we have a damage area tells us which parts of the frame buffer
actually need updating, let's only dcache flush those on video_sync()
calls. With this optimization in place, frame buffer updates - especially
on large screen such as 4k displays - speed up significantly.

Signed-off-by: Alexander Graf 
Reported-by: Da Xue 

---

v1 -> v2:

   - Fix dcache range; we were flushing too much before
   - Remove ifdefs
---
  drivers/video/video-uclass.c | 51 ++--
  1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
index f1fbeaadcb..bf93f524c7 100644
--- a/drivers/video/video-uclass.c
+++ b/drivers/video/video-uclass.c
@@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int 
width, int height)
return 0;
  }

+#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
+static void video_flush_dcache(struct udevice *vid)
+{
+   struct video_priv *priv = dev_get_uclass_priv(vid);
+
+   if (!priv->flush_dcache)
+   return;
+
+   if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+   flush_dcache_range((ulong)priv->fb,
+  ALIGN((ulong)priv->fb + priv->fb_size,
+CONFIG_SYS_CACHELINE_SIZE));
+
+   return;
+   }
+
+   if (priv->damage.endx && priv->damage.endy) {
+   int lstart = priv->damage.x * VNBYTES(priv->bpix);
+   int lend = priv->damage.endx * VNBYTES(priv->bpix);
+   int y;
+
+   for (y = priv->damage.y; y < priv->damage.endy; y++) {
+   ulong fb = (ulong)priv->fb;
+   ulong start = fb + (y * priv->line_length) + lstart;
+   ulong end = start + lend - lstart;
+
+   start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE);
+   end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE);
+
+   flush_dcache_range(start, end);
+   }
+   }
+}
+#endif
+
  /* Flush video activity to the caches */
  int video_sync(struct udevice *vid, bool force)
  {
+   struct video_priv *priv = dev_get_uclass_priv(vid);
struct video_ops *ops = video_get_ops(vid);
int ret;

@@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force)
 * out whether it exists? For now, ARM is safe.
 */
  #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
-   struct video_priv *priv = dev_get_uclass_priv(vid);
-
-   if (priv->flush_dcache) {
-   flush_dcache_range((ulong)priv->fb,
-  ALIGN((ulong)priv->fb + priv->fb_size,
-CONFIG_SYS_CACHELINE_SIZE));
-   }
+   video_flush_dcache(vid);
  #elif defined(CONFIG_VIDEO_SANDBOX_SDL)
-   struct video_priv *priv = dev_get_uclass_priv(vid);
static ulong last_sync;

if (force || get_timer(last_sync) > 100) {
@@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force)
last_sync = get_timer(0);
}
  #endif
+
+   if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+   priv->damage.endx = 0;
+   priv->damage.endy = 0;


priv->damage.x = priv->xsize;
priv->damage.y = priv->ysize;

With this you can avoid differentiating between first damage and later
damage in video_damage().

Best regards

Heinrich


+   }
+
return 0;
  }





[PATCH v3 6/7] video: Only dcache flush damaged lines

2022-12-30 Thread Alexander Graf
Now that we have a damage area tells us which parts of the frame buffer
actually need updating, let's only dcache flush those on video_sync()
calls. With this optimization in place, frame buffer updates - especially
on large screen such as 4k displays - speed up significantly.

Signed-off-by: Alexander Graf 
Reported-by: Da Xue 

---

v1 -> v2:

  - Fix dcache range; we were flushing too much before
  - Remove ifdefs
---
 drivers/video/video-uclass.c | 51 ++--
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/drivers/video/video-uclass.c b/drivers/video/video-uclass.c
index f1fbeaadcb..bf93f524c7 100644
--- a/drivers/video/video-uclass.c
+++ b/drivers/video/video-uclass.c
@@ -297,9 +297,45 @@ int video_damage(struct udevice *vid, int x, int y, int 
width, int height)
return 0;
 }
 
+#if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
+static void video_flush_dcache(struct udevice *vid)
+{
+   struct video_priv *priv = dev_get_uclass_priv(vid);
+
+   if (!priv->flush_dcache)
+   return;
+
+   if (!CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+   flush_dcache_range((ulong)priv->fb,
+  ALIGN((ulong)priv->fb + priv->fb_size,
+CONFIG_SYS_CACHELINE_SIZE));
+
+   return;
+   }
+
+   if (priv->damage.endx && priv->damage.endy) {
+   int lstart = priv->damage.x * VNBYTES(priv->bpix);
+   int lend = priv->damage.endx * VNBYTES(priv->bpix);
+   int y;
+
+   for (y = priv->damage.y; y < priv->damage.endy; y++) {
+   ulong fb = (ulong)priv->fb;
+   ulong start = fb + (y * priv->line_length) + lstart;
+   ulong end = start + lend - lstart;
+
+   start = ALIGN_DOWN(start, CONFIG_SYS_CACHELINE_SIZE);
+   end = ALIGN(end, CONFIG_SYS_CACHELINE_SIZE);
+
+   flush_dcache_range(start, end);
+   }
+   }
+}
+#endif
+
 /* Flush video activity to the caches */
 int video_sync(struct udevice *vid, bool force)
 {
+   struct video_priv *priv = dev_get_uclass_priv(vid);
struct video_ops *ops = video_get_ops(vid);
int ret;
 
@@ -315,15 +351,8 @@ int video_sync(struct udevice *vid, bool force)
 * out whether it exists? For now, ARM is safe.
 */
 #if defined(CONFIG_ARM) && !CONFIG_IS_ENABLED(SYS_DCACHE_OFF)
-   struct video_priv *priv = dev_get_uclass_priv(vid);
-
-   if (priv->flush_dcache) {
-   flush_dcache_range((ulong)priv->fb,
-  ALIGN((ulong)priv->fb + priv->fb_size,
-CONFIG_SYS_CACHELINE_SIZE));
-   }
+   video_flush_dcache(vid);
 #elif defined(CONFIG_VIDEO_SANDBOX_SDL)
-   struct video_priv *priv = dev_get_uclass_priv(vid);
static ulong last_sync;
 
if (force || get_timer(last_sync) > 100) {
@@ -331,6 +360,12 @@ int video_sync(struct udevice *vid, bool force)
last_sync = get_timer(0);
}
 #endif
+
+   if (CONFIG_IS_ENABLED(VIDEO_DAMAGE)) {
+   priv->damage.endx = 0;
+   priv->damage.endy = 0;
+   }
+
return 0;
 }
 
-- 
2.37.1 (Apple Git-137.1)