Re: [PATCH v2 2/2] common/memsize.c: Fix get_ram_size() when cache is enabled

2023-05-30 Thread Michael Nazzareno Trimarchi
Hi

On Tue, May 30, 2023 at 3:49 PM Francesco Dolcini  wrote:
>
> On Tue, May 30, 2023 at 03:42:18PM +0200, Michael Nazzareno Trimarchi wrote:
> > On Tue, May 30, 2023 at 3:34 PM Francesco Dolcini  
> > wrote:
> > >
> > > From: Emanuele Ghidoli 
> > >
> > > Ensure that every write is flushed to memory and afterward reads are
> > > from memory.
> > > Since the algorithm rely on the fact that accessing to not existent
> > > memory lead to write at addr / 2 without this modification accesses
> > > to aliased (not physically present) addresses are cached and
> > > wrong size is returned.
> > >
> > > This was discovered while working on a TI AM625 based board
> > > where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: 
> > > Enable dcache in SPL").
> > >
> > > Signed-off-by: Emanuele Ghidoli 
> > > Signed-off-by: Francesco Dolcini 
> > > ---
> > > v2:
> > >  * check if CONFIG_SYS_CACHELINE_SIZE is defined
> > >  * do flush only when cache is enabled
> > > ---
> > >  common/memsize.c | 24 
> > >  1 file changed, 24 insertions(+)
> > >
> > > diff --git a/common/memsize.c b/common/memsize.c
> > > index 66d5be6a1ff3..d646df8b04cb 100644
> > > --- a/common/memsize.c
> > > +++ b/common/memsize.c
> > > @@ -7,9 +7,18 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > > +#include 
> > >
> > >  DECLARE_GLOBAL_DATA_PTR;
> > >
> > > +#ifdef CONFIG_SYS_CACHELINE_SIZE
> > > +# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
> > > +#else
> > > +/* Just use the greatest cache flush alignment requirement I'm aware of 
> > > */
> > > +# define MEMSIZE_CACHELINE_SIZE 128
> > > +#endif
> > > +
> > >  #ifdef __PPC__
> > >  /*
> > >   * At least on G2 PowerPC cores, sequential accesses to non-existent
> > > @@ -20,6 +29,15 @@ DECLARE_GLOBAL_DATA_PTR;
> > >  # define sync()/* nothing */
> > >  #endif
> > >
> > > +static void dcache_flush_invalidate(volatile long *p)
> > > +{
> > > +   uintptr_t start, stop;
> > > +   start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
> > > +   stop = start + MEMSIZE_CACHELINE_SIZE;
> > > +   flush_dcache_range(start, stop);
> > > +   invalidate_dcache_range(start, stop);
> > > +}
> > > +
> > >  /*
> > >   * Check memory range for valid RAM. A simple memory test determines
> > >   * the actually available RAM size between addresses `base' and
> > > @@ -34,6 +52,7 @@ long get_ram_size(long *base, long maxsize)
> > > long   val;
> > > long   size;
> > > inti = 0;
> > > +   intdcache_en = dcache_status();
> > >
> > > for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
> > > addr = base + cnt;  /* pointer arith! */
> > > @@ -41,6 +60,8 @@ long get_ram_size(long *base, long maxsize)
> > > save[i++] = *addr;
> > > sync();
> > > *addr = ~cnt;
> > > +   if (dcache_en)
> > > +   dcache_flush_invalidate(addr);
> >
> > Why this should be done on every increment if the invalidate keep a
> > range of address?
>
> We do invalidate/flush the current write, the granularity of the
> flush/invalidate is the page size.
>
> This is required since we need to ensure ordering of the writes. How
> would you know where the aliasing is going to happen if you flush all at
> once at the end?
>

I see, I read the code properly of get_mem_size now.

> > Can be possible just to enable/disable cache around memory test?
> In theory yes. In practice this proved some architecture to just crash
> badly because the stack was "corrupted" after re-enabling the cache.
>
> We'll submit a separate bug fix for that, bug given that this pattern
> (disable AND enable) is normally not done in U-Boot it seems like
> looking for trouble doing it in such a commonly used routine.
>

Thank you


Re: [PATCH v2 2/2] common/memsize.c: Fix get_ram_size() when cache is enabled

2023-05-30 Thread Francesco Dolcini
On Tue, May 30, 2023 at 03:42:18PM +0200, Michael Nazzareno Trimarchi wrote:
> On Tue, May 30, 2023 at 3:34 PM Francesco Dolcini  
> wrote:
> >
> > From: Emanuele Ghidoli 
> >
> > Ensure that every write is flushed to memory and afterward reads are
> > from memory.
> > Since the algorithm rely on the fact that accessing to not existent
> > memory lead to write at addr / 2 without this modification accesses
> > to aliased (not physically present) addresses are cached and
> > wrong size is returned.
> >
> > This was discovered while working on a TI AM625 based board
> > where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: 
> > Enable dcache in SPL").
> >
> > Signed-off-by: Emanuele Ghidoli 
> > Signed-off-by: Francesco Dolcini 
> > ---
> > v2:
> >  * check if CONFIG_SYS_CACHELINE_SIZE is defined
> >  * do flush only when cache is enabled
> > ---
> >  common/memsize.c | 24 
> >  1 file changed, 24 insertions(+)
> >
> > diff --git a/common/memsize.c b/common/memsize.c
> > index 66d5be6a1ff3..d646df8b04cb 100644
> > --- a/common/memsize.c
> > +++ b/common/memsize.c
> > @@ -7,9 +7,18 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> > +#include 
> >
> >  DECLARE_GLOBAL_DATA_PTR;
> >
> > +#ifdef CONFIG_SYS_CACHELINE_SIZE
> > +# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
> > +#else
> > +/* Just use the greatest cache flush alignment requirement I'm aware of */
> > +# define MEMSIZE_CACHELINE_SIZE 128
> > +#endif
> > +
> >  #ifdef __PPC__
> >  /*
> >   * At least on G2 PowerPC cores, sequential accesses to non-existent
> > @@ -20,6 +29,15 @@ DECLARE_GLOBAL_DATA_PTR;
> >  # define sync()/* nothing */
> >  #endif
> >
> > +static void dcache_flush_invalidate(volatile long *p)
> > +{
> > +   uintptr_t start, stop;
> > +   start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
> > +   stop = start + MEMSIZE_CACHELINE_SIZE;
> > +   flush_dcache_range(start, stop);
> > +   invalidate_dcache_range(start, stop);
> > +}
> > +
> >  /*
> >   * Check memory range for valid RAM. A simple memory test determines
> >   * the actually available RAM size between addresses `base' and
> > @@ -34,6 +52,7 @@ long get_ram_size(long *base, long maxsize)
> > long   val;
> > long   size;
> > inti = 0;
> > +   intdcache_en = dcache_status();
> >
> > for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
> > addr = base + cnt;  /* pointer arith! */
> > @@ -41,6 +60,8 @@ long get_ram_size(long *base, long maxsize)
> > save[i++] = *addr;
> > sync();
> > *addr = ~cnt;
> > +   if (dcache_en)
> > +   dcache_flush_invalidate(addr);
> 
> Why this should be done on every increment if the invalidate keep a
> range of address?

We do invalidate/flush the current write, the granularity of the
flush/invalidate is the page size.

This is required since we need to ensure ordering of the writes. How
would you know where the aliasing is going to happen if you flush all at
once at the end?

> Can be possible just to enable/disable cache around memory test?
In theory yes. In practice this proved some architecture to just crash
badly because the stack was "corrupted" after re-enabling the cache.

We'll submit a separate bug fix for that, bug given that this pattern
(disable AND enable) is normally not done in U-Boot it seems like
looking for trouble doing it in such a commonly used routine.

Francesco






Re: [PATCH v2 2/2] common/memsize.c: Fix get_ram_size() when cache is enabled

2023-05-30 Thread Michael Nazzareno Trimarchi
Hi

Few questions

On Tue, May 30, 2023 at 3:34 PM Francesco Dolcini  wrote:
>
> From: Emanuele Ghidoli 
>
> Ensure that every write is flushed to memory and afterward reads are
> from memory.
> Since the algorithm rely on the fact that accessing to not existent
> memory lead to write at addr / 2 without this modification accesses
> to aliased (not physically present) addresses are cached and
> wrong size is returned.
>
> This was discovered while working on a TI AM625 based board
> where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: 
> Enable dcache in SPL").
>
> Signed-off-by: Emanuele Ghidoli 
> Signed-off-by: Francesco Dolcini 
> ---
> v2:
>  * check if CONFIG_SYS_CACHELINE_SIZE is defined
>  * do flush only when cache is enabled
> ---
>  common/memsize.c | 24 
>  1 file changed, 24 insertions(+)
>
> diff --git a/common/memsize.c b/common/memsize.c
> index 66d5be6a1ff3..d646df8b04cb 100644
> --- a/common/memsize.c
> +++ b/common/memsize.c
> @@ -7,9 +7,18 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>
>  DECLARE_GLOBAL_DATA_PTR;
>
> +#ifdef CONFIG_SYS_CACHELINE_SIZE
> +# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
> +#else
> +/* Just use the greatest cache flush alignment requirement I'm aware of */
> +# define MEMSIZE_CACHELINE_SIZE 128
> +#endif
> +
>  #ifdef __PPC__
>  /*
>   * At least on G2 PowerPC cores, sequential accesses to non-existent
> @@ -20,6 +29,15 @@ DECLARE_GLOBAL_DATA_PTR;
>  # define sync()/* nothing */
>  #endif
>
> +static void dcache_flush_invalidate(volatile long *p)
> +{
> +   uintptr_t start, stop;
> +   start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
> +   stop = start + MEMSIZE_CACHELINE_SIZE;
> +   flush_dcache_range(start, stop);
> +   invalidate_dcache_range(start, stop);
> +}
> +
>  /*
>   * Check memory range for valid RAM. A simple memory test determines
>   * the actually available RAM size between addresses `base' and
> @@ -34,6 +52,7 @@ long get_ram_size(long *base, long maxsize)
> long   val;
> long   size;
> inti = 0;
> +   intdcache_en = dcache_status();
>
> for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
> addr = base + cnt;  /* pointer arith! */
> @@ -41,6 +60,8 @@ long get_ram_size(long *base, long maxsize)
> save[i++] = *addr;
> sync();
> *addr = ~cnt;
> +   if (dcache_en)
> +   dcache_flush_invalidate(addr);

Why this should be done on every increment if the invalidate keep a
range of address?
> }
>
> addr = base;
> @@ -50,6 +71,9 @@ long get_ram_size(long *base, long maxsize)
> *addr = 0;
>
> sync();
> +   if (dcache_en)
> +   dcache_flush_invalidate(addr);
> +
> if ((val = *addr) != 0) {
> /* Restore the original data before leaving the function. */
> sync();

Can be possible just to enable/disable cache around memory test?

Michael
> --
> 2.25.1
>


-- 
Michael Nazzareno Trimarchi
Co-Founder & Chief Executive Officer
M. +39 347 913 2170
mich...@amarulasolutions.com
__

Amarula Solutions BV
Joop Geesinkweg 125, 1114 AB, Amsterdam, NL
T. +31 (0)85 111 9172
i...@amarulasolutions.com
www.amarulasolutions.com


[PATCH v2 2/2] common/memsize.c: Fix get_ram_size() when cache is enabled

2023-05-30 Thread Francesco Dolcini
From: Emanuele Ghidoli 

Ensure that every write is flushed to memory and afterward reads are
from memory.
Since the algorithm rely on the fact that accessing to not existent
memory lead to write at addr / 2 without this modification accesses
to aliased (not physically present) addresses are cached and
wrong size is returned.

This was discovered while working on a TI AM625 based board
where cache is normally enabled, see commit c02712a74849 ("arm: mach-k3: Enable 
dcache in SPL").

Signed-off-by: Emanuele Ghidoli 
Signed-off-by: Francesco Dolcini 
---
v2:
 * check if CONFIG_SYS_CACHELINE_SIZE is defined
 * do flush only when cache is enabled
---
 common/memsize.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/common/memsize.c b/common/memsize.c
index 66d5be6a1ff3..d646df8b04cb 100644
--- a/common/memsize.c
+++ b/common/memsize.c
@@ -7,9 +7,18 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 DECLARE_GLOBAL_DATA_PTR;
 
+#ifdef CONFIG_SYS_CACHELINE_SIZE
+# define MEMSIZE_CACHELINE_SIZE CONFIG_SYS_CACHELINE_SIZE
+#else
+/* Just use the greatest cache flush alignment requirement I'm aware of */
+# define MEMSIZE_CACHELINE_SIZE 128
+#endif
+
 #ifdef __PPC__
 /*
  * At least on G2 PowerPC cores, sequential accesses to non-existent
@@ -20,6 +29,15 @@ DECLARE_GLOBAL_DATA_PTR;
 # define sync()/* nothing */
 #endif
 
+static void dcache_flush_invalidate(volatile long *p)
+{
+   uintptr_t start, stop;
+   start = ALIGN_DOWN((uintptr_t)p, MEMSIZE_CACHELINE_SIZE);
+   stop = start + MEMSIZE_CACHELINE_SIZE;
+   flush_dcache_range(start, stop);
+   invalidate_dcache_range(start, stop);
+}
+
 /*
  * Check memory range for valid RAM. A simple memory test determines
  * the actually available RAM size between addresses `base' and
@@ -34,6 +52,7 @@ long get_ram_size(long *base, long maxsize)
long   val;
long   size;
inti = 0;
+   intdcache_en = dcache_status();
 
for (cnt = (maxsize / sizeof(long)) >> 1; cnt > 0; cnt >>= 1) {
addr = base + cnt;  /* pointer arith! */
@@ -41,6 +60,8 @@ long get_ram_size(long *base, long maxsize)
save[i++] = *addr;
sync();
*addr = ~cnt;
+   if (dcache_en)
+   dcache_flush_invalidate(addr);
}
 
addr = base;
@@ -50,6 +71,9 @@ long get_ram_size(long *base, long maxsize)
*addr = 0;
 
sync();
+   if (dcache_en)
+   dcache_flush_invalidate(addr);
+
if ((val = *addr) != 0) {
/* Restore the original data before leaving the function. */
sync();
-- 
2.25.1