On Tuesday 25 March 2008, Carl Love wrote: > This patch fixes a bug in the code that records the SPU data and > context switches. The buffer_mutex lock must be held when the > kernel is adding data to the buffer between the kernel and the > OProfile daemon. The lock is not being held in the current code > base. This patch fixes the bug using work queues. The data to > be passed to the daemon is caputured by the interrupt handler. > The workqueue function is invoked to grab the buffer_mutex lock > and add the data to the buffer.
So what was the exact bug you're fixing with this? There was no buffer_mutex before, so why do you need it now? Can't this be a spinlock so you can get it from interrupt context instead of using a workqueue? > Index: linux-2.6.25-rc4/arch/powerpc/oprofile/cell/spu_profiler.c > =================================================================== > --- linux-2.6.25-rc4.orig/arch/powerpc/oprofile/cell/spu_profiler.c > +++ linux-2.6.25-rc4/arch/powerpc/oprofile/cell/spu_profiler.c > @@ -16,6 +16,7 @@ > #include <linux/smp.h> > #include <linux/slab.h> > #include <asm/cell-pmu.h> > +#include <linux/workqueue.h> > #include "pr_util.h" > Please keep #include statements in alphabetical order, with all linux/ files before the asm/ files. > #define TRACE_ARRAY_SIZE 1024 > @@ -32,9 +33,19 @@ static unsigned int profiling_interval; > > #define SPU_PC_MASK 0xFFFF > > +/* The generic OProfile code uses the buffer_mutex to protect the buffer > + * between the kernel and the daemon. The SPU code needs to use the buffer > + * to ensure that the kernel SPU writes complete as a single block before > + * being consumed by the daemon. > + */ > +extern struct mutex buffer_mutex; > + > static DEFINE_SPINLOCK(sample_array_lock); > unsigned long sample_array_lock_flags; > > +struct work_struct spu_record_wq; > +extern struct workqueue_struct *oprofile_spu_wq; > + > void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int > cycles_reset) > { > unsigned long ns_per_cyc; Never put extern statements in the implementation, they describe the interface between two parts of the code and should be inside of a common header. Why do you want to have your own workqueue instead of using the global one? > @@ -123,14 +134,14 @@ static int cell_spu_pc_collection(int cp > return entry; > } > > - > -static enum hrtimer_restart profile_spus(struct hrtimer *timer) > -{ > - ktime_t kt; > +static void profile_spus_record_samples (struct work_struct *ws) { > + /* This routine is called via schedule_work() to record the > + * spu data. It must be run in a normal kernel mode to > + * grab the OProfile mutex lock. > + */ > int cpu, node, k, num_samples, spu_num; > > - if (!spu_prof_running) > - goto stop; > + mutex_lock(&buffer_mutex); > > for_each_online_cpu(cpu) { > if (cbe_get_hw_thread_id(cpu)) > @@ -170,6 +181,20 @@ static enum hrtimer_restart profile_spus > smp_wmb(); /* insure spu event buffer updates are written */ > /* don't want events intermingled... */ > > + mutex_unlock(&buffer_mutex); > +} > + > +static enum hrtimer_restart profile_spus(struct hrtimer *timer) > +{ > + ktime_t kt; > + > + > + if (!spu_prof_running) > + goto stop; > + > + /* schedule the funtion to record the data */ > + schedule_work(&spu_record_wq); > + > kt = ktime_set(0, profiling_interval); > if (!spu_prof_running) > goto stop; This looks like you want to use a delayed_work rather than building your own out of hrtimer and work. Is there any point why you want to use an hrtimer? > -static DEFINE_SPINLOCK(buffer_lock); > +extern struct mutex buffer_mutex; > +extern struct workqueue_struct *oprofile_spu_wq; > +extern int calls_to_record_switch; > + Again, public interfaces need to go to a header file, and should have a name that identifies the interface. "buffer_mutex" is certainly not a suitable name for a kernel-wide global variable! > static DEFINE_SPINLOCK(cache_lock); > static int num_spu_nodes; > + > int spu_prof_num_nodes; > int last_guard_val[MAX_NUMNODES * 8]; > +int cnt_swtch_processed_flag[MAX_NUMNODES * 8]; > + > +struct spus_profiling_code_data_s { > + int num_spu_nodes; > + struct work_struct spu_prof_code_wq; > +} spus_profiling_code_data; > + > +struct spu_context_switch_data_s { > + struct spu *spu; > + unsigned long spu_cookie; > + unsigned long app_dcookie; > + unsigned int offset; > + unsigned long objectId; > + int valid_entry; > +} spu_context_switch_data; I don't understand what these variables are really doing, but having e.g. just one spu_context_switch_data for all the SPUs doesn't seem to make much sense. What happens when two SPUs do a context switch at the same time? > +int calls_to_record_switch = 0; > +int record_spu_start_flag = 0; > + > +struct spus_cntxt_sw_data_s { > + int num_spu_nodes; > + struct spu_context_switch_data_s spu_data[MAX_NUMNODES * 8]; > + struct work_struct spu_cntxt_work; > +} spus_cntxt_sw_data; Something is very wrong if you need so many global variables! > /* Container for caching information about an active SPU task. */ > struct cached_info { > @@ -44,6 +73,8 @@ struct cached_info { > struct kref cache_ref; > }; > > +struct workqueue_struct *oprofile_spu_wq; > + > static struct cached_info *spu_info[MAX_NUMNODES * 8]; While you're cleaning this up, I guess the cached_info should be moved into a pointer from struct spu as well, instead of having this global variable here. > @@ -375,16 +457,30 @@ int spu_sync_start(void) > int k; > int ret = SKIP_GENERIC_SYNC; > int register_ret; > - unsigned long flags = 0; > > spu_prof_num_nodes = number_of_online_nodes(); > num_spu_nodes = spu_prof_num_nodes * 8; > > - spin_lock_irqsave(&buffer_lock, flags); > - add_event_entry(ESCAPE_CODE); > - add_event_entry(SPU_PROFILING_CODE); > - add_event_entry(num_spu_nodes); > - spin_unlock_irqrestore(&buffer_lock, flags); > + /* create private work queue, execution of work is time critical */ > + oprofile_spu_wq = create_workqueue("spu_oprofile"); > + > + /* due to a race when the spu is already running stuff, need to > + * set a flag to tell the spu context switch to record the start > + * before recording the context switches. > + */ > + record_spu_start_flag = 1; > + > + spus_profiling_code_data.num_spu_nodes = num_spu_nodes; > + > + /* setup work queue functiion for recording context switch info */ > + spus_cntxt_sw_data.num_spu_nodes = num_spu_nodes; > + for (k = 0; k<(MAX_NUMNODES * 8); k++) { > + spus_cntxt_sw_data.spu_data[k].valid_entry = 0; > + cnt_swtch_processed_flag[k] = 0; > + } > + > + INIT_WORK(&spus_cntxt_sw_data.spu_cntxt_work, > + record_spu_process_switch); I would guess that you need one work struct per SPU instead of a global one, if you want to pass the SPU pointer as an argument. Arnd <>< _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev