[PATCH, RESEND] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
From aa5810fa545515c9f383e3e649bd120bef9c7f29 Mon Sep 17 00:00:00 2001 From: Carl Love [EMAIL PROTECTED] Date: Fri, 8 Aug 2008 15:38:36 -0700 Subject: [PATCH] powerpc/cell/oprofile: fix mutex locking for spu-oprofile The issue is the SPU code is not holding the kernel mutex lock while adding samples to the kernel buffer. This patch creates per SPU buffers to hold the data. Data is added to the buffers from in interrupt context. The data is periodically pushed to the kernel buffer via a new Oprofile function oprofile_put_buff(). The oprofile_put_buff() function is called via a work queue enabling the funtion to acquire the mutex lock. The existing user controls for adjusting the per CPU buffer size is used to control the size of the per SPU buffers. Similarly, overflows of the SPU buffers are reported by incrementing the per CPU buffer stats. This eliminates the need to have architecture specific controls for the per SPU buffers which is not acceptable to the OProfile user tool maintainer. The export of the oprofile add_event_entry() is removed as it is no longer needed given this patch. Note, this patch has not addressed the issue of indexing arrays by the spu number. This still needs to be fixed as the spu numbering is not guarenteed to be 0 to max_num_spus-1. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Signed-off-by: Arnd Bergmann [EMAIL PROTECTED] Acked-by: Acked-by: Robert Richter [EMAIL PROTECTED] --- arch/powerpc/oprofile/cell/pr_util.h | 13 ++ arch/powerpc/oprofile/cell/spu_profiler.c |4 +- arch/powerpc/oprofile/cell/spu_task_sync.c | 236 +--- drivers/oprofile/buffer_sync.c | 24 +++ drivers/oprofile/cpu_buffer.c | 15 ++- drivers/oprofile/event_buffer.h|7 + include/linux/oprofile.h | 16 +- 7 files changed, 279 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h index 22e4e8d..628009c 100644 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -24,6 +24,11 @@ #define SKIP_GENERIC_SYNC 0 #define SYNC_START_ERROR -1 #define DO_GENERIC_SYNC 1 +#define SPUS_PER_NODE 8 +#define DEFAULT_TIMER_EXPIRE (HZ / 10) + +extern struct delayed_work spu_work; +extern int spu_prof_running; struct spu_overlay_info { /* map of sections within an SPU overlay */ unsigned int vma; /* SPU virtual memory address from elf */ @@ -62,6 +67,14 @@ struct vma_to_fileoffset_map { /* map of sections within an SPU program */ }; +struct spu_buffer { + int last_guard_val; + int ctx_sw_seen; + unsigned long *buff; + unsigned int head, tail; +}; + + /* The three functions below are for maintaining and accessing * the vma-to-fileoffset map. */ diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index 380d7e2..6edaebd 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -23,12 +23,11 @@ static u32 *samples; -static int spu_prof_running; +int spu_prof_running; static unsigned int profiling_interval; #define NUM_SPU_BITS_TRBUF 16 #define SPUS_PER_TB_ENTRY 4 -#define SPUS_PER_NODE 8 #define SPU_PC_MASK 0x @@ -208,6 +207,7 @@ int start_spu_profiling(unsigned int cycles_reset) spu_prof_running = 1; hrtimer_start(timer, kt, HRTIMER_MODE_REL); + schedule_delayed_work(spu_work, DEFAULT_TIMER_EXPIRE); return 0; } diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 2a9b4a0..2949126 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -35,7 +35,102 @@ static DEFINE_SPINLOCK(buffer_lock); static DEFINE_SPINLOCK(cache_lock); static int num_spu_nodes; int spu_prof_num_nodes; -int last_guard_val[MAX_NUMNODES * 8]; + +struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE]; +struct delayed_work spu_work; +static unsigned max_spu_buff; + +static void spu_buff_add(unsigned long int value, int spu) +{ + /* spu buff is a circular buffer. Add entries to the +* head. Head is the index to store the next value. +* The buffer is full when there is one available entry +* in the queue, i.e. head and tail can't be equal. +* That way we can tell the difference between the +* buffer being full versus empty. +* +* ASSUPTION: the buffer_lock is held when this function +* is called to lock the buffer, head and tail. +*/ + int full = 1; + + if (spu_buff[spu].head = spu_buff[spu].tail) { + if ((spu_buff[spu].head - spu_buff[spu].tail) + (max_spu_buff - 1)) + full = 0; + + } else if (spu_buff
Re: [Cbe-oss-dev] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Monday 25 August 2008, Arnd Bergmann wrote: On Monday 25 August 2008, Paul Mackerras wrote: Since rc4 is out now, I understand if you feel more comfortable with putting the patch into -next instead of -merge. Linus has been getting stricter about only putting in fixes for regressions and serious bugs (see his recent email to Dave Airlie on LKML for instance). I assume that the corruption is just in the data that is supplied to userspace and doesn't extend to any kernel data structures. That's right, please queue it for -next then. I just realized that this patch never made it into powerpc-next after all, neither benh nor paulus version. Whoever is handling it today, could you please pull master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6.git merge to get this commit below. I have rebased it on top of the current benh/powerpc/next branch. Thanks, Arnd --- commit aa5810fa545515c9f383e3e649bd120bef9c7f29 Author: Carl Love [EMAIL PROTECTED] Date: Fri Aug 8 15:38:36 2008 -0700 powerpc/cell/oprofile: fix mutex locking for spu-oprofile The issue is the SPU code is not holding the kernel mutex lock while adding samples to the kernel buffer. This patch creates per SPU buffers to hold the data. Data is added to the buffers from in interrupt context. The data is periodically pushed to the kernel buffer via a new Oprofile function oprofile_put_buff(). The oprofile_put_buff() function is called via a work queue enabling the funtion to acquire the mutex lock. The existing user controls for adjusting the per CPU buffer size is used to control the size of the per SPU buffers. Similarly, overflows of the SPU buffers are reported by incrementing the per CPU buffer stats. This eliminates the need to have architecture specific controls for the per SPU buffers which is not acceptable to the OProfile user tool maintainer. The export of the oprofile add_event_entry() is removed as it is no longer needed given this patch. Note, this patch has not addressed the issue of indexing arrays by the spu number. This still needs to be fixed as the spu numbering is not guarenteed to be 0 to max_num_spus-1. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Signed-off-by: Arnd Bergmann [EMAIL PROTECTED] Acked-by: Acked-by: Robert Richter [EMAIL PROTECTED] arch/powerpc/oprofile/cell/pr_util.h | 13 + arch/powerpc/oprofile/cell/spu_profiler.c |4 arch/powerpc/oprofile/cell/spu_task_sync.c | 236 --- drivers/oprofile/buffer_sync.c | 24 ++ drivers/oprofile/cpu_buffer.c | 15 + drivers/oprofile/event_buffer.c|2 drivers/oprofile/event_buffer.h|7 include/linux/oprofile.h | 16 + drivers/oprofile/cpu_buffer.c |4 9 files changed, 284 insertions(+), 37 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [Cbe-oss-dev] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On 13.10.08 16:53:28, Arnd Bergmann wrote: On Monday 25 August 2008, Arnd Bergmann wrote: On Monday 25 August 2008, Paul Mackerras wrote: Since rc4 is out now, I understand if you feel more comfortable with putting the patch into -next instead of -merge. Linus has been getting stricter about only putting in fixes for regressions and serious bugs (see his recent email to Dave Airlie on LKML for instance). I assume that the corruption is just in the data that is supplied to userspace and doesn't extend to any kernel data structures. That's right, please queue it for -next then. I just realized that this patch never made it into powerpc-next after all, neither benh nor paulus version. Whoever is handling it today, could you please pull master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6.git merge to get this commit below. I have rebased it on top of the current benh/powerpc/next branch. All powerpc oprofile patches are in this branch: git://git.kernel.org/pub/scm/linux/kernel/git/rric/oprofile.git powerpc-for-paul Pending patches are: Carl Love (1): powerpc/cell/oprofile: fix mutex locking for spu-oprofile Roel Kluin (1): powerpc/cell/oprofile: vma_map: fix test on overlay_tbl_offset Please pull from there. -Robert -- Advanced Micro Devices, Inc. Operating System Research Center email: [EMAIL PROTECTED] ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [Cbe-oss-dev] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
Arnd Bergmann writes: The patch does not fix a regression, the spu-oprofile code basically never worked. With the current code in Linux, samples in the profile buffer can get corrupted because reader and writer to that buffer use different locks for accessing it. It took us several iterations to come up with a solution that does not introduce other problems and I didn't want to push an earlier version that would need more fixups. Since rc4 is out now, I understand if you feel more comfortable with putting the patch into -next instead of -merge. Linus has been getting stricter about only putting in fixes for regressions and serious bugs (see his recent email to Dave Airlie on LKML for instance). I assume that the corruption is just in the data that is supplied to userspace and doesn't extend to any kernel data structures. If it does then we have a much stronger argument for pushing this stuff for 2.6.27. Note that the second patch is trivial and fixes an oopsable condition of the kernel, so at least that should still go into 2.6.27. OK, I'll cherry-pick that one for my next batch for Linus. Paul. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [Cbe-oss-dev] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Monday 25 August 2008, Paul Mackerras wrote: Since rc4 is out now, I understand if you feel more comfortable with putting the patch into -next instead of -merge. Linus has been getting stricter about only putting in fixes for regressions and serious bugs (see his recent email to Dave Airlie on LKML for instance). I assume that the corruption is just in the data that is supplied to userspace and doesn't extend to any kernel data structures. That's right, please queue it for -next then. Note that the second patch is trivial and fixes an oopsable condition of the kernel, so at least that should still go into 2.6.27. OK, I'll cherry-pick that one for my next batch for Linus. Thanks, Arnd ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [Cbe-oss-dev] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Thursday 21 August 2008, Paul Mackerras wrote: Arnd Bergmann writes: Paul, any chance we can still get this into 2.6.27? Possibly. We'll need a really good explanation for Linus as to why this is needed (what regression or serious bug this fixes) and why it is late. Can you send me something explaining that? The patch does not fix a regression, the spu-oprofile code basically never worked. With the current code in Linux, samples in the profile buffer can get corrupted because reader and writer to that buffer use different locks for accessing it. It took us several iterations to come up with a solution that does not introduce other problems and I didn't want to push an earlier version that would need more fixups. Since rc4 is out now, I understand if you feel more comfortable with putting the patch into -next instead of -merge. Note that the second patch is trivial and fixes an oopsable condition of the kernel, so at least that should still go into 2.6.27. I've added the Ack and uploaded it again for you to pull from master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6.git merge Are you sure you actually managed to update that? No, but it's there now. I was missing the '-f' for git-push. Arnd ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [Cbe-oss-dev] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Thu, 2008-08-21 at 10:14 +0200, Arnd Bergmann wrote: On Thursday 21 August 2008, Paul Mackerras wrote: Arnd Bergmann writes: Paul, any chance we can still get this into 2.6.27? Possibly. We'll need a really good explanation for Linus as to why this is needed (what regression or serious bug this fixes) and why it is late. Can you send me something explaining that? The patch does not fix a regression, the spu-oprofile code basically never worked. With the current code in Linux, samples in the profile buffer can get corrupted because reader and writer to that buffer use different locks for accessing it. Actually for me it worked[1] a reasonable amount of the time, enough to be useful. So the spu-oprofile code has always been broken in this way, but it's not always fatal. So the patch doesn't fix a regression, but it fixes a serious user-visible bug, which makes it legit rc4 material IMHO. [1] that was late last year, so possibly a kernel or two ago. cheers -- Michael Ellerman OzLabs, IBM Australia Development Lab wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person signature.asc Description: This is a digitally signed message part ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [Cbe-oss-dev] powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Thu, 2008-08-21 at 20:20 +1000, Michael Ellerman wrote: On Thu, 2008-08-21 at 10:14 +0200, Arnd Bergmann wrote: On Thursday 21 August 2008, Paul Mackerras wrote: Arnd Bergmann writes: Paul, any chance we can still get this into 2.6.27? Possibly. We'll need a really good explanation for Linus as to why this is needed (what regression or serious bug this fixes) and why it is late. Can you send me something explaining that? The patch does not fix a regression, the spu-oprofile code basically never worked. With the current code in Linux, samples in the profile buffer can get corrupted because reader and writer to that buffer use different locks for accessing it. Actually for me it worked[1] a reasonable amount of the time, enough to be useful. So the spu-oprofile code has always been broken in this way, but it's not always fatal. So the patch doesn't fix a regression, but it fixes a serious user-visible bug, which makes it legit rc4 material IMHO. [1] that was late last year, so possibly a kernel or two ago. The bug came in the original OProfile SPU support that was put out about 2 years ago. The way the code was there was a window in which you may get corruption. It was not until Jan 08 when we got the first report of the bug from Michael and identified it. Since then there have been three or four more people who have hit and reported the bug. I am seeing the bug show up more frequently with the latest couple of weekly SDK 3.1 kernels. It would seem that the kernel may have changed such that the timing is more likely to hit the bug. For the Beta SDK 3.1 release the IVT team was not able to complete their OProfile testing due to the bug. cheers - This SF.Net email is sponsored by the Moblin Your Move Developer's challenge Build the coolest Linux based applications with Moblin SDK win great prizes Grand prize is a trip for two to an Open Source event anywhere in the world http://moblin-contest.org/redirect.php?banner_id=100url=/ ___ oprofile-list mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/oprofile-list ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: powerpc/cell/oprofile: fix mutex locking for spu-oprofile
I am fine with the changes with the exception of removing add_event_entry() from include/linux/oprofile.h. Though there is no usage of the function also in other architectures anymore, this change in the API should be discussed on the oprofile mailing list. Please separate the change in a different patch and submit it to the mailing list. If there are no objections then, this change can go upstream as well. -Robert On 11.08.08 09:25:07, Arnd Bergmann wrote: From: Carl Love [EMAIL PROTECTED] The issue is the SPU code is not holding the kernel mutex lock while adding samples to the kernel buffer. This patch creates per SPU buffers to hold the data. Data is added to the buffers from in interrupt context. The data is periodically pushed to the kernel buffer via a new Oprofile function oprofile_put_buff(). The oprofile_put_buff() function is called via a work queue enabling the funtion to acquire the mutex lock. The existing user controls for adjusting the per CPU buffer size is used to control the size of the per SPU buffers. Similarly, overflows of the SPU buffers are reported by incrementing the per CPU buffer stats. This eliminates the need to have architecture specific controls for the per SPU buffers which is not acceptable to the OProfile user tool maintainer. The export of the oprofile add_event_entry() is removed as it is no longer needed given this patch. Note, this patch has not addressed the issue of indexing arrays by the spu number. This still needs to be fixed as the spu numbering is not guarenteed to be 0 to max_num_spus-1. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Signed-off-by: Arnd Bergmann [EMAIL PROTECTED] --- arch/powerpc/oprofile/cell/pr_util.h | 13 ++ arch/powerpc/oprofile/cell/spu_profiler.c |4 +- arch/powerpc/oprofile/cell/spu_task_sync.c | 236 +--- drivers/oprofile/buffer_sync.c | 24 +++ drivers/oprofile/cpu_buffer.c | 15 ++- drivers/oprofile/event_buffer.h|7 + include/linux/oprofile.h | 16 +- 7 files changed, 279 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h index 22e4e8d..628009c 100644 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -24,6 +24,11 @@ #define SKIP_GENERIC_SYNC 0 #define SYNC_START_ERROR -1 #define DO_GENERIC_SYNC 1 +#define SPUS_PER_NODE 8 +#define DEFAULT_TIMER_EXPIRE (HZ / 10) + +extern struct delayed_work spu_work; +extern int spu_prof_running; struct spu_overlay_info {/* map of sections within an SPU overlay */ unsigned int vma; /* SPU virtual memory address from elf */ @@ -62,6 +67,14 @@ struct vma_to_fileoffset_map { /* map of sections within an SPU program */ }; +struct spu_buffer { + int last_guard_val; + int ctx_sw_seen; + unsigned long *buff; + unsigned int head, tail; +}; + + /* The three functions below are for maintaining and accessing * the vma-to-fileoffset map. */ diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index 380d7e2..6edaebd 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -23,12 +23,11 @@ static u32 *samples; -static int spu_prof_running; +int spu_prof_running; static unsigned int profiling_interval; #define NUM_SPU_BITS_TRBUF 16 #define SPUS_PER_TB_ENTRY 4 -#define SPUS_PER_NODE 8 #define SPU_PC_MASK 0x @@ -208,6 +207,7 @@ int start_spu_profiling(unsigned int cycles_reset) spu_prof_running = 1; hrtimer_start(timer, kt, HRTIMER_MODE_REL); + schedule_delayed_work(spu_work, DEFAULT_TIMER_EXPIRE); return 0; } diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 2a9b4a0..2949126 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -35,7 +35,102 @@ static DEFINE_SPINLOCK(buffer_lock); static DEFINE_SPINLOCK(cache_lock); static int num_spu_nodes; int spu_prof_num_nodes; -int last_guard_val[MAX_NUMNODES * 8]; + +struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE]; +struct delayed_work spu_work; +static unsigned max_spu_buff; + +static void spu_buff_add(unsigned long int value, int spu) +{ + /* spu buff is a circular buffer. Add entries to the + * head. Head is the index to store the next value. + * The buffer is full when there is one available entry + * in the queue, i.e. head and tail can't be equal. + * That way we can tell the difference between the + * buffer being full versus empty. + * + * ASSUPTION: the buffer_lock is held when this function +
Re: powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Wednesday 20 August 2008, Robert Richter wrote: I am fine with the changes with the exception of removing add_event_entry() from include/linux/oprofile.h. Though there is no usage of the function also in other architectures anymore, this change in the API should be discussed on the oprofile mailing list. Please separate the change in a different patch and submit it to the mailing list. If there are no objections then, this change can go upstream as well. As an explanation, the removal of add_event_entry is the whole point of this patch. add_event_entry must only be called with buffer_mutex held, but buffer_mutex itself is not exported. I'm pretty sure that no other user of add_event_entry exists, as it was exported specifically for the SPU support and that never worked. Any other (theoretical) code using it would be broken in the same way and need a corresponding fix. We can easily leave the declaration in place, but I'd recommend removing it eventually. If you prefer to keep it, how about marking it as __deprecated? Arnd ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On 20.08.08 14:05:31, Arnd Bergmann wrote: On Wednesday 20 August 2008, Robert Richter wrote: I am fine with the changes with the exception of removing add_event_entry() from include/linux/oprofile.h. Though there is no usage of the function also in other architectures anymore, this change in the API should be discussed on the oprofile mailing list. Please separate the change in a different patch and submit it to the mailing list. If there are no objections then, this change can go upstream as well. As an explanation, the removal of add_event_entry is the whole point of this patch. add_event_entry must only be called with buffer_mutex held, but buffer_mutex itself is not exported. Thanks for pointing this out. I'm pretty sure that no other user of add_event_entry exists, as it was exported specifically for the SPU support and that never worked. Any other (theoretical) code using it would be broken in the same way and need a corresponding fix. We can easily leave the declaration in place, but I'd recommend removing it eventually. If you prefer to keep it, how about marking it as __deprecated? No, since this is broken by design we remove it. The patch can go upstream as it is. Thanks, -Robert -- Advanced Micro Devices, Inc. Operating System Research Center email: [EMAIL PROTECTED] ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On 11.08.08 09:25:07, Arnd Bergmann wrote: From: Carl Love [EMAIL PROTECTED] The issue is the SPU code is not holding the kernel mutex lock while adding samples to the kernel buffer. This patch creates per SPU buffers to hold the data. Data is added to the buffers from in interrupt context. The data is periodically pushed to the kernel buffer via a new Oprofile function oprofile_put_buff(). The oprofile_put_buff() function is called via a work queue enabling the funtion to acquire the mutex lock. The existing user controls for adjusting the per CPU buffer size is used to control the size of the per SPU buffers. Similarly, overflows of the SPU buffers are reported by incrementing the per CPU buffer stats. This eliminates the need to have architecture specific controls for the per SPU buffers which is not acceptable to the OProfile user tool maintainer. The export of the oprofile add_event_entry() is removed as it is no longer needed given this patch. Note, this patch has not addressed the issue of indexing arrays by the spu number. This still needs to be fixed as the spu numbering is not guarenteed to be 0 to max_num_spus-1. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Signed-off-by: Arnd Bergmann [EMAIL PROTECTED] Acked-by: Robert Richter [EMAIL PROTECTED] -Robert -- Advanced Micro Devices, Inc. Operating System Research Center email: [EMAIL PROTECTED] ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Wednesday 20 August 2008, Robert Richter wrote: Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Signed-off-by: Arnd Bergmann [EMAIL PROTECTED] Acked-by: Robert Richter [EMAIL PROTECTED] Thanks Robert. Paul, any chance we can still get this into 2.6.27? I've added the Ack and uploaded it again for you to pull from master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6.git merge Arnd ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: powerpc/cell/oprofile: fix mutex locking for spu-oprofile
On Wed, 2008-08-20 at 14:39 +0200, Robert Richter wrote: On 20.08.08 14:05:31, Arnd Bergmann wrote: On Wednesday 20 August 2008, Robert Richter wrote: I am fine with the changes with the exception of removing add_event_entry() from include/linux/oprofile.h. Though there is no usage of the function also in other architectures anymore, this change in the API should be discussed on the oprofile mailing list. Please separate the change in a different patch and submit it to the mailing list. If there are no objections then, this change can go upstream as well. As an explanation, the removal of add_event_entry is the whole point of this patch. add_event_entry must only be called with buffer_mutex held, but buffer_mutex itself is not exported. Thanks for pointing this out. We originally added add_event_entry() to include/linux/oprofile.h specifically because it was needed for the CELL SPU support. As it turns out it the approach was not completely thought through. We were using the function call without holding the mutex lock. As we discovered later, this can result in corrupting the data put into the event buffer. So exposing the function without a way to hold the mutex lock is actually a really bad idea as it would encourage others to fall into the same mistake that we made. So, as Arnd said, the whole point of this patch is to come up with a correct approach to adding the data. I'm pretty sure that no other user of add_event_entry exists, as it was exported specifically for the SPU support and that never worked. Any other (theoretical) code using it would be broken in the same way and need a corresponding fix. We can easily leave the declaration in place, but I'd recommend removing it eventually. If you prefer to keep it, how about marking it as __deprecated? No, since this is broken by design we remove it. The patch can go upstream as it is. Thanks, -Robert It really is best to remove it. Thank you for taking the time to review and comment on the patch. Carl Love ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: powerpc/cell/oprofile: fix mutex locking for spu-oprofile
Arnd Bergmann writes: Paul, any chance we can still get this into 2.6.27? Possibly. We'll need a really good explanation for Linus as to why this is needed (what regression or serious bug this fixes) and why it is late. Can you send me something explaining that? I've added the Ack and uploaded it again for you to pull from master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6.git merge Are you sure you actually managed to update that? $ git ls-remote master.kernel.org:/pub/scm/linux/kernel/git/arnd/cell-2.6.git merge f90a87b5f5fa46dc6c556e9267a6f25a95fbef14refs/heads/merge and f90a87b5f5fa46dc6c556e9267a6f25a95fbef14 is the same commit from 2008-08-09 that you asked me to pull previously. Paul. ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
powerpc/cell/oprofile: fix mutex locking for spu-oprofile
From: Carl Love [EMAIL PROTECTED] The issue is the SPU code is not holding the kernel mutex lock while adding samples to the kernel buffer. This patch creates per SPU buffers to hold the data. Data is added to the buffers from in interrupt context. The data is periodically pushed to the kernel buffer via a new Oprofile function oprofile_put_buff(). The oprofile_put_buff() function is called via a work queue enabling the funtion to acquire the mutex lock. The existing user controls for adjusting the per CPU buffer size is used to control the size of the per SPU buffers. Similarly, overflows of the SPU buffers are reported by incrementing the per CPU buffer stats. This eliminates the need to have architecture specific controls for the per SPU buffers which is not acceptable to the OProfile user tool maintainer. The export of the oprofile add_event_entry() is removed as it is no longer needed given this patch. Note, this patch has not addressed the issue of indexing arrays by the spu number. This still needs to be fixed as the spu numbering is not guarenteed to be 0 to max_num_spus-1. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Signed-off-by: Arnd Bergmann [EMAIL PROTECTED] --- arch/powerpc/oprofile/cell/pr_util.h | 13 ++ arch/powerpc/oprofile/cell/spu_profiler.c |4 +- arch/powerpc/oprofile/cell/spu_task_sync.c | 236 +--- drivers/oprofile/buffer_sync.c | 24 +++ drivers/oprofile/cpu_buffer.c | 15 ++- drivers/oprofile/event_buffer.h|7 + include/linux/oprofile.h | 16 +- 7 files changed, 279 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h index 22e4e8d..628009c 100644 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -24,6 +24,11 @@ #define SKIP_GENERIC_SYNC 0 #define SYNC_START_ERROR -1 #define DO_GENERIC_SYNC 1 +#define SPUS_PER_NODE 8 +#define DEFAULT_TIMER_EXPIRE (HZ / 10) + +extern struct delayed_work spu_work; +extern int spu_prof_running; struct spu_overlay_info { /* map of sections within an SPU overlay */ unsigned int vma; /* SPU virtual memory address from elf */ @@ -62,6 +67,14 @@ struct vma_to_fileoffset_map { /* map of sections within an SPU program */ }; +struct spu_buffer { + int last_guard_val; + int ctx_sw_seen; + unsigned long *buff; + unsigned int head, tail; +}; + + /* The three functions below are for maintaining and accessing * the vma-to-fileoffset map. */ diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index 380d7e2..6edaebd 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -23,12 +23,11 @@ static u32 *samples; -static int spu_prof_running; +int spu_prof_running; static unsigned int profiling_interval; #define NUM_SPU_BITS_TRBUF 16 #define SPUS_PER_TB_ENTRY 4 -#define SPUS_PER_NODE 8 #define SPU_PC_MASK 0x @@ -208,6 +207,7 @@ int start_spu_profiling(unsigned int cycles_reset) spu_prof_running = 1; hrtimer_start(timer, kt, HRTIMER_MODE_REL); + schedule_delayed_work(spu_work, DEFAULT_TIMER_EXPIRE); return 0; } diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 2a9b4a0..2949126 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -35,7 +35,102 @@ static DEFINE_SPINLOCK(buffer_lock); static DEFINE_SPINLOCK(cache_lock); static int num_spu_nodes; int spu_prof_num_nodes; -int last_guard_val[MAX_NUMNODES * 8]; + +struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE]; +struct delayed_work spu_work; +static unsigned max_spu_buff; + +static void spu_buff_add(unsigned long int value, int spu) +{ + /* spu buff is a circular buffer. Add entries to the +* head. Head is the index to store the next value. +* The buffer is full when there is one available entry +* in the queue, i.e. head and tail can't be equal. +* That way we can tell the difference between the +* buffer being full versus empty. +* +* ASSUPTION: the buffer_lock is held when this function +* is called to lock the buffer, head and tail. +*/ + int full = 1; + + if (spu_buff[spu].head = spu_buff[spu].tail) { + if ((spu_buff[spu].head - spu_buff[spu].tail) + (max_spu_buff - 1)) + full = 0; + + } else if (spu_buff[spu].tail spu_buff[spu].head) { + if ((spu_buff[spu].tail - spu_buff[spu].head) +1) + full = 0; + } + + if (!full) { +