Re: [PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-18 Thread Peter Zijlstra
On Wed, Jun 17, 2020 at 11:51:07PM -0700, Christoph Hellwig wrote:
> Much better.  Although if we touch all the callers we might as well
> pass the csd as the argument to the callback, as with that we can
> pretty trivially remove the private data field later.

My plan was to introduce a new function and type and convert
smp_call_function_async() callers over to that. The csd as it exists is
useful for the regular smp_call_function*() API.

> Btw, it seems the callers that don't have the CSD embedded into the
> containing structure seems to be of these two kinds:
> 
>  - reimplementing on_each_cpumask (mostly because they can be called
>from irq context)

These are fairly special purpose constructs; and they come at the cost
of extra per-cpu storage and they have the limitiation that they must
wait for completion of the first before they can be used again.

>  - reimplenenting smp_call_function_single because they want
>to sleep instead of busy wait

These are atrocious pieces of crap (the x86/msr ones), the reason it was
done is because virt :/


Re: [PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-18 Thread Christoph Hellwig
On Wed, Jun 17, 2020 at 01:04:01PM +0200, Peter Zijlstra wrote:
> On Wed, Jun 17, 2020 at 01:23:49AM -0700, Christoph Hellwig wrote:
> 
> > > @@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
> > >   if (atomic_inc_return(_data->scheduled) > 1)
> > >   continue;
> > >  
> > > - cpu_data->csd.func = zpci_handle_remote_irq;
> > > - cpu_data->csd.info = _data->scheduled;
> > > - cpu_data->csd.flags = 0;
> > > + cpu_data->csd = CSD_INIT(zpci_handle_remote_irq, 
> > > _data->scheduled);
> > 
> > This looks weird.  I'd much rather see an initialization ala INIT_WORK:
> > 
> > INIT_CSD(_data->csd, zpci_handle_remote_irq,
> >  _data->scheduled);
> 
> 
> like so then?

Much better.  Although if we touch all the callers we might as well
pass the csd as the argument to the callback, as with that we can
pretty trivially remove the private data field later.

Btw, it seems the callers that don't have the CSD embedded into the
containing structure seems to be of these two kinds:

 - reimplementing on_each_cpumask (mostly because they can be called
   from irq context)
 - reimplenenting smp_call_function_single because they want
   to sleep instead of busy wait

I wonder if those would be useful primitives for smp.c..


Re: [PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-17 Thread Peter Zijlstra
On Wed, Jun 17, 2020 at 01:23:49AM -0700, Christoph Hellwig wrote:

> > @@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
> > if (atomic_inc_return(_data->scheduled) > 1)
> > continue;
> >  
> > -   cpu_data->csd.func = zpci_handle_remote_irq;
> > -   cpu_data->csd.info = _data->scheduled;
> > -   cpu_data->csd.flags = 0;
> > +   cpu_data->csd = CSD_INIT(zpci_handle_remote_irq, 
> > _data->scheduled);
> 
> This looks weird.  I'd much rather see an initialization ala INIT_WORK:
> 
>   INIT_CSD(_data->csd, zpci_handle_remote_irq,
>_data->scheduled);


like so then?

---
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -687,7 +687,6 @@ unsigned long arch_align_stack(unsigned
return sp & ALMASK;
 }
 
-static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
 static struct cpumask backtrace_csd_busy;
 
 static void handle_backtrace(void *info)
@@ -696,6 +695,9 @@ static void handle_backtrace(void *info)
cpumask_clear_cpu(smp_processor_id(), _csd_busy);
 }
 
+static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) =
+   CSD_INIT(handle_backtrace, NULL);
+
 static void raise_backtrace(cpumask_t *mask)
 {
call_single_data_t *csd;
@@ -715,7 +717,6 @@ static void raise_backtrace(cpumask_t *m
}
 
csd = _cpu(backtrace_csd, cpu);
-   csd->func = handle_backtrace;
smp_call_function_single_async(cpu, csd);
}
 }
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -687,36 +687,23 @@ EXPORT_SYMBOL(flush_tlb_one);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
-static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
-
-void tick_broadcast(const struct cpumask *mask)
-{
-   call_single_data_t *csd;
-   int cpu;
-
-   for_each_cpu(cpu, mask) {
-   csd = _cpu(tick_broadcast_csd, cpu);
-   smp_call_function_single_async(cpu, csd);
-   }
-}
-
 static void tick_broadcast_callee(void *info)
 {
tick_receive_broadcast();
 }
 
-static int __init tick_broadcast_init(void)
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd) =
+   CSD_INIT(tick_broadcast_callee, NULL);
+
+void tick_broadcast(const struct cpumask *mask)
 {
call_single_data_t *csd;
int cpu;
 
-   for (cpu = 0; cpu < NR_CPUS; cpu++) {
+   for_each_cpu(cpu, mask) {
csd = _cpu(tick_broadcast_csd, cpu);
-   csd->func = tick_broadcast_callee;
+   smp_call_function_single_async(cpu, csd);
}
-
-   return 0;
 }
-early_initcall(tick_broadcast_init);
 
 #endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
if (atomic_inc_return(_data->scheduled) > 1)
continue;
 
-   cpu_data->csd.func = zpci_handle_remote_irq;
-   cpu_data->csd.info = _data->scheduled;
-   cpu_data->csd.flags = 0;
+   INIT_CSD(_data->csd, zpci_handle_remote_irq, 
_data->scheduled);
smp_call_function_single_async(cpu, _data->csd);
}
 }
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -74,10 +74,9 @@ static ssize_t cpuid_read(struct file *f
 
init_completion();
for (; count; count -= 16) {
-   call_single_data_t csd = {
-   .func = cpuid_smp_cpuid,
-   .info = ,
-   };
+   call_single_data_t csd;
+
+   INIT_CSD(, cpuid_smp_cpuid, );
 
cmd.regs.eax = pos;
cmd.regs.ecx = pos >> 32;
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -169,12 +169,11 @@ static void __wrmsr_safe_on_cpu(void *in
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
struct msr_info_completion rv;
-   call_single_data_t csd = {
-   .func   = __rdmsr_safe_on_cpu,
-   .info   = ,
-   };
+   call_single_data_t csd;
int err;
 
+   INIT_CSD(, __rdmsr_safe_on_cpu, );
+
memset(, 0, sizeof(rv));
init_completion();
rv.msr.msr_no = msr_no;
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -629,9 +629,7 @@ void blk_mq_force_complete_rq(struct req
shared = cpus_share_cache(cpu, ctx->cpu);
 
if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
-   rq->csd.func = __blk_mq_complete_request_remote;
-   rq->csd.info = rq;
-   rq->csd.flags = 0;
+   INIT_CSD(>csd, __blk_mq_complete_request_remote, rq);
smp_call_function_single_async(ctx->cpu, >csd);
} else {
q->mq_ops->complete(rq);
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -57,13 +57,8 @@ static void trigger_softirq(void *data)
 

Re: [PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-17 Thread Peter Zijlstra
On Wed, Jun 17, 2020 at 01:23:49AM -0700, Christoph Hellwig wrote:
> > -static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
> > +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) = 
> > CSD_INIT(handle_backtrace, NULL);
> >  static struct cpumask backtrace_csd_busy;
> 
> Besides the crazy long line: does assigning to a DEFINE_PER_CPU
> really work and initialize all the members?

Yes. The way it works is that it initializes the variable that ends up
in the .data..percpu section and that's copied when we create the
actual per-cpu things.

> > @@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
> > if (atomic_inc_return(_data->scheduled) > 1)
> > continue;
> >  
> > -   cpu_data->csd.func = zpci_handle_remote_irq;
> > -   cpu_data->csd.info = _data->scheduled;
> > -   cpu_data->csd.flags = 0;
> > +   cpu_data->csd = CSD_INIT(zpci_handle_remote_irq, 
> > _data->scheduled);
> 
> This looks weird.  I'd much rather see an initialization ala INIT_WORK:
> 
>   INIT_CSD(_data->csd, zpci_handle_remote_irq,
>_data->scheduled);
> 
> Also for many smp_call_function_* users it would be trivial and actually
> lead to nicer code if the data argument went away and we'd just use
> container_of to get to the containing structure.  For the remaining
> ones we can trivially general a container strucuture that has the
> extra data pointer.

Agreed, except that won't work for things like cfd_data, csd_data and
csd_stack in smp.c. It might be possible to rework some of that, but
that's going to be further surgery.

> > --- a/block/blk-mq.c
> > +++ b/block/blk-mq.c
> > @@ -629,9 +629,7 @@ void blk_mq_force_complete_rq(struct req
> > shared = cpus_share_cache(cpu, ctx->cpu);
> >  
> > if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
> > -   rq->csd.func = __blk_mq_complete_request_remote;
> > -   rq->csd.info = rq;
> > -   rq->csd.flags = 0;
> > +   rq->csd = CSD_INIT(__blk_mq_complete_request_remote, rq);
> > smp_call_function_single_async(ctx->cpu, >csd);
> > } else {
> > q->mq_ops->complete(rq);
> > --- a/block/blk-softirq.c
> > +++ b/block/blk-softirq.c
> > @@ -57,13 +57,8 @@ static void trigger_softirq(void *data)
> >  static int raise_blk_irq(int cpu, struct request *rq)
> >  {
> > if (cpu_online(cpu)) {
> > -   call_single_data_t *data = >csd;
> > -
> > -   data->func = trigger_softirq;
> > -   data->info = rq;
> > -   data->flags = 0;
> > -
> > -   smp_call_function_single_async(cpu, data);
> > +   rq->csd = CSD_INIT(trigger_softirq, rq);
> > +   smp_call_function_single_async(cpu, >csd);
> > return 0;
> > }
> 
> FYI, I rewrote much of the blk code in this series:
> 
> https://lore.kernel.org/linux-block/20200611064452.12353-1-...@lst.de/T/#t
> 
> that you also were Cced on.

Yes, I know. The merge shouldn't be too difficult, but if that's landed
in a git tree meanwhile, I can try and pull that in.

> >  struct __call_single_data {
> > -   union {
> > -   struct __call_single_node node;
> > -   struct {
> > -   struct llist_node llist;
> > -   unsigned int flags;
> > -   };
> > -   };
> > +   struct __call_single_node node;
> > smp_call_func_t func;
> > void *info;
> >  };
> 
> Can we rename this to struct call_single_data without the __prefix
> and switch all the users you touch anyway away from the typedef?

That mess exists because of the alignment thing. IIRC you can't use the
sizeof() of a struct you're still declaring.


Re: [PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-17 Thread Christoph Hellwig
> -static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
> +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) = 
> CSD_INIT(handle_backtrace, NULL);
>  static struct cpumask backtrace_csd_busy;

Besides the crazy long line: does assigning to a DEFINE_PER_CPU
really work and initialize all the members?

> @@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
>   if (atomic_inc_return(_data->scheduled) > 1)
>   continue;
>  
> - cpu_data->csd.func = zpci_handle_remote_irq;
> - cpu_data->csd.info = _data->scheduled;
> - cpu_data->csd.flags = 0;
> + cpu_data->csd = CSD_INIT(zpci_handle_remote_irq, 
> _data->scheduled);

This looks weird.  I'd much rather see an initialization ala INIT_WORK:

INIT_CSD(_data->csd, zpci_handle_remote_irq,
 _data->scheduled);

Also for many smp_call_function_* users it would be trivial and actually
lead to nicer code if the data argument went away and we'd just use
container_of to get to the containing structure.  For the remaining
ones we can trivially general a container strucuture that has the
extra data pointer.

> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -629,9 +629,7 @@ void blk_mq_force_complete_rq(struct req
>   shared = cpus_share_cache(cpu, ctx->cpu);
>  
>   if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
> - rq->csd.func = __blk_mq_complete_request_remote;
> - rq->csd.info = rq;
> - rq->csd.flags = 0;
> + rq->csd = CSD_INIT(__blk_mq_complete_request_remote, rq);
>   smp_call_function_single_async(ctx->cpu, >csd);
>   } else {
>   q->mq_ops->complete(rq);
> --- a/block/blk-softirq.c
> +++ b/block/blk-softirq.c
> @@ -57,13 +57,8 @@ static void trigger_softirq(void *data)
>  static int raise_blk_irq(int cpu, struct request *rq)
>  {
>   if (cpu_online(cpu)) {
> - call_single_data_t *data = >csd;
> -
> - data->func = trigger_softirq;
> - data->info = rq;
> - data->flags = 0;
> -
> - smp_call_function_single_async(cpu, data);
> + rq->csd = CSD_INIT(trigger_softirq, rq);
> + smp_call_function_single_async(cpu, >csd);
>   return 0;
>   }

FYI, I rewrote much of the blk code in this series:

https://lore.kernel.org/linux-block/20200611064452.12353-1-...@lst.de/T/#t

that you also were Cced on.

>  struct __call_single_data {
> - union {
> - struct __call_single_node node;
> - struct {
> - struct llist_node llist;
> - unsigned int flags;
> - };
> - };
> + struct __call_single_node node;
>   smp_call_func_t func;
>   void *info;
>  };

Can we rename this to struct call_single_data without the __prefix
and switch all the users you touch anyway away from the typedef?


Re: [PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-15 Thread Daniel Thompson
On Mon, Jun 15, 2020 at 02:57:00PM +0200, Peter Zijlstra wrote:
> Get rid of the __call_single_node union and cleanup the API a little
> to avoid external code relying on the structure layout as much.
> 
> Signed-off-by: Peter Zijlstra (Intel) 

For kgdb,
Acked-by: Daniel Thompson 


Daniel.


> ---
>  arch/mips/kernel/process.c  |3 -
>  arch/mips/kernel/smp.c  |   24 +++---
>  arch/s390/pci/pci_irq.c |4 --
>  arch/x86/kernel/cpuid.c |5 ---
>  arch/x86/lib/msr-smp.c  |5 ---
>  block/blk-mq.c  |4 --
>  block/blk-softirq.c |9 +
>  drivers/cpuidle/coupled.c   |3 -
>  drivers/net/ethernet/cavium/liquidio/lio_core.c |9 +
>  include/linux/smp.h |   11 ++
>  kernel/debug/debug_core.c   |5 +--
>  kernel/sched/core.c |   12 +--
>  kernel/smp.c|   40 
> 
>  net/core/dev.c  |3 -
>  14 files changed, 44 insertions(+), 93 deletions(-)
> 
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -686,7 +686,7 @@ unsigned long arch_align_stack(unsigned
>   return sp & ALMASK;
>  }
>  
> -static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
> +static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) = 
> CSD_INIT(handle_backtrace, NULL);
>  static struct cpumask backtrace_csd_busy;
>  
>  static void handle_backtrace(void *info)
> @@ -714,7 +714,6 @@ static void raise_backtrace(cpumask_t *m
>   }
>  
>   csd = _cpu(backtrace_csd, cpu);
> - csd->func = handle_backtrace;
>   smp_call_function_single_async(cpu, csd);
>   }
>  }
> --- a/arch/mips/kernel/smp.c
> +++ b/arch/mips/kernel/smp.c
> @@ -687,36 +687,22 @@ EXPORT_SYMBOL(flush_tlb_one);
>  
>  #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
>  
> -static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
> -
> -void tick_broadcast(const struct cpumask *mask)
> -{
> - call_single_data_t *csd;
> - int cpu;
> -
> - for_each_cpu(cpu, mask) {
> - csd = _cpu(tick_broadcast_csd, cpu);
> - smp_call_function_single_async(cpu, csd);
> - }
> -}
> -
>  static void tick_broadcast_callee(void *info)
>  {
>   tick_receive_broadcast();
>  }
>  
> -static int __init tick_broadcast_init(void)
> +static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd) = 
> CSD_INIT(tick_broadcast_callee, NULL);
> +
> +void tick_broadcast(const struct cpumask *mask)
>  {
>   call_single_data_t *csd;
>   int cpu;
>  
> - for (cpu = 0; cpu < NR_CPUS; cpu++) {
> + for_each_cpu(cpu, mask) {
>   csd = _cpu(tick_broadcast_csd, cpu);
> - csd->func = tick_broadcast_callee;
> + smp_call_function_single_async(cpu, csd);
>   }
> -
> - return 0;
>  }
> -early_initcall(tick_broadcast_init);
>  
>  #endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
> --- a/arch/s390/pci/pci_irq.c
> +++ b/arch/s390/pci/pci_irq.c
> @@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
>   if (atomic_inc_return(_data->scheduled) > 1)
>   continue;
>  
> - cpu_data->csd.func = zpci_handle_remote_irq;
> - cpu_data->csd.info = _data->scheduled;
> - cpu_data->csd.flags = 0;
> + cpu_data->csd = CSD_INIT(zpci_handle_remote_irq, 
> _data->scheduled);
>   smp_call_function_single_async(cpu, _data->csd);
>   }
>  }
> --- a/arch/x86/kernel/cpuid.c
> +++ b/arch/x86/kernel/cpuid.c
> @@ -74,10 +74,7 @@ static ssize_t cpuid_read(struct file *f
>  
>   init_completion();
>   for (; count; count -= 16) {
> - call_single_data_t csd = {
> - .func = cpuid_smp_cpuid,
> - .info = ,
> - };
> + call_single_data_t csd = CSD_INIT(cpuid_smp_cpuid, );
>  
>   cmd.regs.eax = pos;
>   cmd.regs.ecx = pos >> 32;
> --- a/arch/x86/lib/msr-smp.c
> +++ b/arch/x86/lib/msr-smp.c
> @@ -169,10 +169,7 @@ static void __wrmsr_safe_on_cpu(void *in
>  int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
>  {
>   struct msr_info_completion rv;
> - call_single_data_t csd = {
> - .func   = __rdmsr_safe_on_cpu,
> - .info   = ,
> - };
> + call_single_data_t csd = CSD_INIT(__rdmsr_safe_on_cpu, );
>   int err;
>  
>   memset(, 0, sizeof(rv));
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -629,9 +629,7 @@ void blk_mq_force_complete_rq(struct req
>   shared = cpus_share_cache(cpu, ctx->cpu);
>  
>   if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
> - rq->csd.func = 

Re: [PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-15 Thread Jens Axboe
On 6/15/20 6:57 AM, Peter Zijlstra wrote:
> Get rid of the __call_single_node union and cleanup the API a little
> to avoid external code relying on the structure layout as much.

core and block bits look good to me.

-- 
Jens Axboe



[PATCH 6/6] smp: Cleanup smp_call_function*()

2020-06-15 Thread Peter Zijlstra
Get rid of the __call_single_node union and cleanup the API a little
to avoid external code relying on the structure layout as much.

Signed-off-by: Peter Zijlstra (Intel) 
---
 arch/mips/kernel/process.c  |3 -
 arch/mips/kernel/smp.c  |   24 +++---
 arch/s390/pci/pci_irq.c |4 --
 arch/x86/kernel/cpuid.c |5 ---
 arch/x86/lib/msr-smp.c  |5 ---
 block/blk-mq.c  |4 --
 block/blk-softirq.c |9 +
 drivers/cpuidle/coupled.c   |3 -
 drivers/net/ethernet/cavium/liquidio/lio_core.c |9 +
 include/linux/smp.h |   11 ++
 kernel/debug/debug_core.c   |5 +--
 kernel/sched/core.c |   12 +--
 kernel/smp.c|   40 
 net/core/dev.c  |3 -
 14 files changed, 44 insertions(+), 93 deletions(-)

--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -686,7 +686,7 @@ unsigned long arch_align_stack(unsigned
return sp & ALMASK;
 }
 
-static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
+static DEFINE_PER_CPU(call_single_data_t, backtrace_csd) = 
CSD_INIT(handle_backtrace, NULL);
 static struct cpumask backtrace_csd_busy;
 
 static void handle_backtrace(void *info)
@@ -714,7 +714,6 @@ static void raise_backtrace(cpumask_t *m
}
 
csd = _cpu(backtrace_csd, cpu);
-   csd->func = handle_backtrace;
smp_call_function_single_async(cpu, csd);
}
 }
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -687,36 +687,22 @@ EXPORT_SYMBOL(flush_tlb_one);
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
 
-static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd);
-
-void tick_broadcast(const struct cpumask *mask)
-{
-   call_single_data_t *csd;
-   int cpu;
-
-   for_each_cpu(cpu, mask) {
-   csd = _cpu(tick_broadcast_csd, cpu);
-   smp_call_function_single_async(cpu, csd);
-   }
-}
-
 static void tick_broadcast_callee(void *info)
 {
tick_receive_broadcast();
 }
 
-static int __init tick_broadcast_init(void)
+static DEFINE_PER_CPU(call_single_data_t, tick_broadcast_csd) = 
CSD_INIT(tick_broadcast_callee, NULL);
+
+void tick_broadcast(const struct cpumask *mask)
 {
call_single_data_t *csd;
int cpu;
 
-   for (cpu = 0; cpu < NR_CPUS; cpu++) {
+   for_each_cpu(cpu, mask) {
csd = _cpu(tick_broadcast_csd, cpu);
-   csd->func = tick_broadcast_callee;
+   smp_call_function_single_async(cpu, csd);
}
-
-   return 0;
 }
-early_initcall(tick_broadcast_init);
 
 #endif /* CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
--- a/arch/s390/pci/pci_irq.c
+++ b/arch/s390/pci/pci_irq.c
@@ -178,9 +178,7 @@ static void zpci_handle_fallback_irq(voi
if (atomic_inc_return(_data->scheduled) > 1)
continue;
 
-   cpu_data->csd.func = zpci_handle_remote_irq;
-   cpu_data->csd.info = _data->scheduled;
-   cpu_data->csd.flags = 0;
+   cpu_data->csd = CSD_INIT(zpci_handle_remote_irq, 
_data->scheduled);
smp_call_function_single_async(cpu, _data->csd);
}
 }
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -74,10 +74,7 @@ static ssize_t cpuid_read(struct file *f
 
init_completion();
for (; count; count -= 16) {
-   call_single_data_t csd = {
-   .func = cpuid_smp_cpuid,
-   .info = ,
-   };
+   call_single_data_t csd = CSD_INIT(cpuid_smp_cpuid, );
 
cmd.regs.eax = pos;
cmd.regs.ecx = pos >> 32;
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -169,10 +169,7 @@ static void __wrmsr_safe_on_cpu(void *in
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
struct msr_info_completion rv;
-   call_single_data_t csd = {
-   .func   = __rdmsr_safe_on_cpu,
-   .info   = ,
-   };
+   call_single_data_t csd = CSD_INIT(__rdmsr_safe_on_cpu, );
int err;
 
memset(, 0, sizeof(rv));
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -629,9 +629,7 @@ void blk_mq_force_complete_rq(struct req
shared = cpus_share_cache(cpu, ctx->cpu);
 
if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
-   rq->csd.func = __blk_mq_complete_request_remote;
-   rq->csd.info = rq;
-   rq->csd.flags = 0;
+   rq->csd = CSD_INIT(__blk_mq_complete_request_remote, rq);
smp_call_function_single_async(ctx->cpu, >csd);
} else {