Re: [PATCH v5 27/33] riscv: Fix race conditions when initializing IPI

Sean Anderson Mon, 02 Mar 2020 15:43:55 -0800

On 3/2/20 6:17 PM, Lukas Auer wrote:
> On Fri, 2020-02-28 at 16:05 -0500, Sean Anderson wrote:
> 
>> The IPI code could have race conditions in several places.
>> * Several harts could race on the value of gd->arch->clint/plic
>> * Non-boot harts could race with the main hart on the DM subsystem In
>>   addition, if an IPI was pending when U-Boot started, it would cause the
>>   IPI handler to jump to address 0.
>>
>> To address these problems, a new function riscv_init_ipi is introduced. It
>> is called once during arch_cpu_init_dm. Before this point, no riscv_*_ipi
>> functions may be called. Access is synchronized by gd->arch->ipi_ready.
>>
>> Signed-off-by: Sean Anderson <sean...@gmail.com>
>> ---
>>
>> Changes in v5:
>> - New
>>
>>  arch/riscv/cpu/cpu.c                 |  9 ++++
>>  arch/riscv/include/asm/global_data.h |  1 +
>>  arch/riscv/include/asm/smp.h         | 43 ++++++++++++++++++
>>  arch/riscv/lib/andes_plic.c          | 34 +++++---------
>>  arch/riscv/lib/sbi_ipi.c             |  5 ++
>>  arch/riscv/lib/sifive_clint.c        | 33 +++++---------
>>  arch/riscv/lib/smp.c                 | 68 ++++++++--------------------
>>  7 files changed, 101 insertions(+), 92 deletions(-)
>>
>> diff --git a/arch/riscv/cpu/cpu.c b/arch/riscv/cpu/cpu.c
>> index e457f6acbf..a971ec8694 100644
>> --- a/arch/riscv/cpu/cpu.c
>> +++ b/arch/riscv/cpu/cpu.c
>> @@ -96,6 +96,15 @@ int arch_cpu_init_dm(void)
>>                      csr_write(CSR_SATP, 0);
>>      }
>>  
>> +#ifdef CONFIG_SMP
>> +    ret = riscv_init_ipi();
>> +    if (ret)
>> +            return ret;
>> +
>> +    /* Atomically set a flag enabling IPI handling */
>> +    WRITE_ONCE(gd->arch.ipi_ready, 1);
>> +#endif
>> +
>>      return 0;
>>  }
>>  
>> diff --git a/arch/riscv/include/asm/global_data.h 
>> b/arch/riscv/include/asm/global_data.h
>> index 7276d9763f..b24f8fd2a7 100644
>> --- a/arch/riscv/include/asm/global_data.h
>> +++ b/arch/riscv/include/asm/global_data.h
>> @@ -28,6 +28,7 @@ struct arch_global_data {
>>  #endif
>>  #ifdef CONFIG_SMP
>>      struct ipi_data ipi[CONFIG_NR_CPUS];
>> +    long ipi_ready; /* Set after riscv_init_ipi is called */
>>  #endif
>>  #ifndef CONFIG_XIP
>>      ulong available_harts;
>> diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
>> index 74de92ed13..1b428856b2 100644
>> --- a/arch/riscv/include/asm/smp.h
>> +++ b/arch/riscv/include/asm/smp.h
>> @@ -51,4 +51,47 @@ void handle_ipi(ulong hart);
>>   */
>>  int smp_call_function(ulong addr, ulong arg0, ulong arg1, int wait);
>>  
>> +/**
>> + * riscv_init_ipi() - Initialize inter-process interrupt (IPI) driver
>> + *
>> + * Platform code must provide this function. This function is called once 
>> after
>> + * the cpu driver is initialized. No other riscv_*_ipi() calls will be made
>> + * before this function is called.
>> + *
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_init_ipi(void);
>> +
>> +/**
>> + * riscv_send_ipi() - Send inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of receiving hart
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_send_ipi(int hart);
>> +
>> +/**
>> + * riscv_clear_ipi() - Clear inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of hart to be cleared
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_clear_ipi(int hart);
>> +
>> +/**
>> + * riscv_get_ipi() - Get status of inter-processor interrupt (IPI)
>> + *
>> + * Platform code must provide this function.
>> + *
>> + * @hart: Hart ID of hart to be checked
>> + * @pending: Pointer to variable with result of the check,
>> + *           1 if IPI is pending, 0 otherwise
>> + * @return 0 if OK, -ve on error
>> + */
>> +int riscv_get_ipi(int hart, int *pending);
>> +
>>  #endif
>> diff --git a/arch/riscv/lib/andes_plic.c b/arch/riscv/lib/andes_plic.c
>> index 20529ab3eb..8484f76386 100644
>> --- a/arch/riscv/lib/andes_plic.c
>> +++ b/arch/riscv/lib/andes_plic.c
>> @@ -30,20 +30,6 @@
>>  #define SEND_IPI_TO_HART(hart)  (0x80 >> (hart))
>>  
>>  DECLARE_GLOBAL_DATA_PTR;
>> -static int init_plic(void);
>> -
>> -#define PLIC_BASE_GET(void)                                         \
>> -    do {                                                            \
>> -            long *ret;                                              \
>> -                                                                    \
>> -            if (!gd->arch.plic) {                                   \
>> -                    ret = syscon_get_first_range(RISCV_SYSCON_PLIC); \
>> -                    if (IS_ERR(ret))                                \
>> -                            return PTR_ERR(ret);                    \
>> -                    gd->arch.plic = ret;                            \
>> -                    init_plic();                                    \
>> -            }                                                       \
>> -    } while (0)
>>  
>>  static int enable_ipi(int hart)
>>  {
>> @@ -93,13 +79,21 @@ static int init_plic(void)
>>      return -ENODEV;
>>  }
>>  
>> +int riscv_init_ipi(void)
>> +{
>> +    int ret = syscon_get_first_range(RISCV_SYSCON_PLIC);
>> +
>> +    if (IS_ERR(ret))
>> +            return PTR_ERR(ret);
>> +    gd->arch.plic = ret;
>> +
>> +    return init_plic();
>> +}
>> +
>>  int riscv_send_ipi(int hart)
>>  {
>> -    unsigned int ipi;
>> +    unsigned int ipi = (SEND_IPI_TO_HART(hart) << (8 * gd->arch.boot_hart));
>>  
>> -    PLIC_BASE_GET();
>> -
>> -    ipi = (SEND_IPI_TO_HART(hart) << (8 * gd->arch.boot_hart));
>>      writel(ipi, (void __iomem *)PENDING_REG(gd->arch.plic,
>>                              gd->arch.boot_hart));
>>  
>> @@ -110,8 +104,6 @@ int riscv_clear_ipi(int hart)
>>  {
>>      u32 source_id;
>>  
>> -    PLIC_BASE_GET();
>> -
>>      source_id = readl((void __iomem *)CLAIM_REG(gd->arch.plic, hart));
>>      writel(source_id, (void __iomem *)CLAIM_REG(gd->arch.plic, hart));
>>  
>> @@ -120,8 +112,6 @@ int riscv_clear_ipi(int hart)
>>  
>>  int riscv_get_ipi(int hart, int *pending)
>>  {
>> -    PLIC_BASE_GET();
>> -
>>      *pending = readl((void __iomem *)PENDING_REG(gd->arch.plic,
>>                                                   gd->arch.boot_hart));
>>      *pending = !!(*pending & SEND_IPI_TO_HART(hart));
>> diff --git a/arch/riscv/lib/sbi_ipi.c b/arch/riscv/lib/sbi_ipi.c
>> index 9a698ce74e..310d1bd2a4 100644
>> --- a/arch/riscv/lib/sbi_ipi.c
>> +++ b/arch/riscv/lib/sbi_ipi.c
>> @@ -7,6 +7,11 @@
>>  #include <common.h>
>>  #include <asm/sbi.h>
>>  
>> +int riscv_init_ipi(void)
>> +{
>> +    return 0;
>> +}
>> +
>>  int riscv_send_ipi(int hart)
>>  {
>>      ulong mask;
>> diff --git a/arch/riscv/lib/sifive_clint.c b/arch/riscv/lib/sifive_clint.c
>> index 5e0d25720b..62c1b2b0ef 100644
>> --- a/arch/riscv/lib/sifive_clint.c
>> +++ b/arch/riscv/lib/sifive_clint.c
>> @@ -24,22 +24,8 @@
>>  
>>  DECLARE_GLOBAL_DATA_PTR;
>>  
>> -#define CLINT_BASE_GET(void)                                                
>> \
>> -    do {                                                            \
>> -            long *ret;                                              \
>> -                                                                    \
>> -            if (!gd->arch.clint) {                                  \
>> -                    ret = syscon_get_first_range(RISCV_SYSCON_CLINT); \
>> -                    if (IS_ERR(ret))                                \
>> -                            return PTR_ERR(ret);                    \
>> -                    gd->arch.clint = ret;                           \
>> -            }                                                       \
>> -    } while (0)
>> -
>>  int riscv_get_time(u64 *time)
>>  {
>> -    CLINT_BASE_GET();
>> -
>>      *time = readq((void __iomem *)MTIME_REG(gd->arch.clint));
>>  
>>      return 0;
>> @@ -47,17 +33,24 @@ int riscv_get_time(u64 *time)
>>  
>>  int riscv_set_timecmp(int hart, u64 cmp)
>>  {
>> -    CLINT_BASE_GET();
>> -
>>      writeq(cmp, (void __iomem *)MTIMECMP_REG(gd->arch.clint, hart));
>>  
>>      return 0;
>>  }
>>  
>> +int riscv_init_ipi(void)
>> +{
>> +            long *ret = syscon_get_first_range(RISCV_SYSCON_CLINT);
>> +
>> +            if (IS_ERR(ret))
>> +                    return PTR_ERR(ret);
>> +            gd->arch.clint = ret;
>> +
>> +            return 0;
>> +}
>> +
> 
> Please fix the indentation here.
>


Ok.

>>  int riscv_send_ipi(int hart)
>>  {
>> -    CLINT_BASE_GET();
>> -
>>      writel(1, (void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>  
>>      return 0;
>> @@ -65,8 +58,6 @@ int riscv_send_ipi(int hart)
>>  
>>  int riscv_clear_ipi(int hart)
>>  {
>> -    CLINT_BASE_GET();
>> -
>>      writel(0, (void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>  
>>      return 0;
>> @@ -74,8 +65,6 @@ int riscv_clear_ipi(int hart)
>>  
>>  int riscv_get_ipi(int hart, int *pending)
>>  {
>> -    CLINT_BASE_GET();
>> -
>>      *pending = readl((void __iomem *)MSIP_REG(gd->arch.clint, hart));
>>  
>>      return 0;
>> diff --git a/arch/riscv/lib/smp.c b/arch/riscv/lib/smp.c
>> index 17adb35730..3b1e52e9b2 100644
>> --- a/arch/riscv/lib/smp.c
>> +++ b/arch/riscv/lib/smp.c
>> @@ -12,38 +12,6 @@
>>  
>>  DECLARE_GLOBAL_DATA_PTR;
>>  
>> -/**
>> - * riscv_send_ipi() - Send inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of receiving hart
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_send_ipi(int hart);
>> -
>> -/**
>> - * riscv_clear_ipi() - Clear inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of hart to be cleared
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_clear_ipi(int hart);
>> -
>> -/**
>> - * riscv_get_ipi() - Get status of inter-processor interrupt (IPI)
>> - *
>> - * Platform code must provide this function.
>> - *
>> - * @hart: Hart ID of hart to be checked
>> - * @pending: Pointer to variable with result of the check,
>> - *           1 if IPI is pending, 0 otherwise
>> - * @return 0 if OK, -ve on error
>> - */
>> -extern int riscv_get_ipi(int hart, int *pending);
>> -
>>  static int send_ipi_many(struct ipi_data *ipi, int wait)
>>  {
>>      ofnode node, cpus;
>> @@ -110,37 +78,41 @@ void handle_ipi(ulong hart)
>>      int ret;
>>      void (*smp_function)(ulong hart, ulong arg0, ulong arg1);
>>  
>> -    if (hart >= CONFIG_NR_CPUS)
>> +    if (hart >= CONFIG_NR_CPUS || !READ_ONCE(gd->arch.ipi_ready))
>>              return;
>>  
>> -    __smp_mb();
>> -
>> -    smp_function = (void (*)(ulong, ulong, ulong))gd->arch.ipi[hart].addr;
>> -    invalidate_icache_all();
>> -
> 
> Don't move this. It is intended to be run before the IPI is cleared.

Hm, ok. I think I moved it to after because of the 'if (!smp_function)'
check, but those two don't really need to be done together.

>>      /*
>>       * Clear the IPI to acknowledge the request before jumping to the
>>       * requested function.
>>       */
>>      ret = riscv_clear_ipi(hart);
>>      if (ret) {
>> -            pr_err("Cannot clear IPI of hart %ld\n", hart);
>> +            pr_err("Cannot clear IPI of hart %ld (error %d)\n", hart, ret);
>>              return;
>>      }
>>  
>> +    __smp_mb();
>> +
>> +    smp_function = (void (*)(ulong, ulong, ulong))gd->arch.ipi[hart].addr;
>> +    /*
>> +     * There may be an IPI raised before u-boot begins execution, so check
>> +     * to ensure we actually have a function to call.
>> +     */
>> +    if (!smp_function)
>> +            return;
>> +    log_debug("hart = %lu func = %p\n", hart, smp_function);
> 
> The log messages might be corrupted if multiple harts are calling the
> log function here. I have not looked into the details so this might not
> be an issue. In that case it is fine to keep, otherwise please remove
> it.

I ran into this problem a lot when debugging. I ended up implementing a
spinlock around puts/putc. I agree it's probably better to remove this,
but I worry that concurrency bugs will become much harder to track down
without some kind of feedback. (This same criticism applies to the log
message above as well).

> 
> Thanks,
> Lukas
> 
>> +    invalidate_icache_all();
>> +
>>      smp_function(hart, gd->arch.ipi[hart].arg0, gd->arch.ipi[hart].arg1);
>>  }
>>  
>>  int smp_call_function(ulong addr, ulong arg0, ulong arg1, int wait)
>>  {
>> -    int ret = 0;
>> -    struct ipi_data ipi;
>> +    struct ipi_data ipi = {
>> +            .addr = addr,
>> +            .arg0 = arg0,
>> +            .arg1 = arg1,
>> +    };
>>  
>> -    ipi.addr = addr;
>> -    ipi.arg0 = arg0;
>> -    ipi.arg1 = arg1;
>> -
>> -    ret = send_ipi_many(&ipi, wait);
>> -
>> -    return ret;
>> +    return send_ipi_many(&ipi, wait);
>>  }

Re: [PATCH v5 27/33] riscv: Fix race conditions when initializing IPI

Reply via email to