Re: [PATCH v3 41/44] metag: OProfile
On 01/10/2013 09:31 AM, James Hogan wrote: > Add oprofile support for metag. > > Signed-off-by: James Hogan > Cc: Robert Richter > Cc: oprofile-l...@lists.sf.net > --- > arch/metag/Kconfig|1 + > arch/metag/Makefile |2 + > arch/metag/oprofile/Makefile | 16 ++ > arch/metag/oprofile/backtrace.c | 134 ++ > arch/metag/oprofile/backtrace.h |6 + > arch/metag/oprofile/op_model_meta12.c | 242 > + > 6 files changed, 401 insertions(+), 0 deletions(-) > create mode 100644 arch/metag/oprofile/Makefile > create mode 100644 arch/metag/oprofile/backtrace.c > create mode 100644 arch/metag/oprofile/backtrace.h > create mode 100644 arch/metag/oprofile/op_model_meta12.c > > diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig > index e2235e3..98433e5 100644 > --- a/arch/metag/Kconfig > +++ b/arch/metag/Kconfig > @@ -25,6 +25,7 @@ config METAG > select HAVE_MEMBLOCK > select HAVE_MEMBLOCK_NODE_MAP > select HAVE_MOD_ARCH_SPECIFIC > + select HAVE_OPROFILE > select HAVE_PERF_EVENTS > select HAVE_SYSCALL_TRACEPOINTS > select IRQ_DOMAIN > diff --git a/arch/metag/Makefile b/arch/metag/Makefile > index d455140..53fc094 100644 > --- a/arch/metag/Makefile > +++ b/arch/metag/Makefile > @@ -49,6 +49,8 @@ core-y += > arch/metag/mm/ > libs-y += arch/metag/lib/ > libs-y += arch/metag/tbx/ > > +drivers-$(CONFIG_OPROFILE) += arch/metag/oprofile/ > + > boot := arch/metag/boot > > boot_targets += uImage > diff --git a/arch/metag/oprofile/Makefile b/arch/metag/oprofile/Makefile > new file mode 100644 > index 000..4b4ceee > --- /dev/null > +++ b/arch/metag/oprofile/Makefile > @@ -0,0 +1,16 @@ > +obj-y+= oprofile.o > + > +oprofile-core-y += buffer_sync.o > +oprofile-core-y += cpu_buffer.o > +oprofile-core-y += event_buffer.o > +oprofile-core-y += oprof.o > +oprofile-core-y += oprofile_files.o > +oprofile-core-y += oprofile_stats.o > +oprofile-core-y += oprofilefs.o > +oprofile-core-y += timer_int.o > + > +oprofile-y += backtrace.o > +oprofile-y += op_model_meta12.o > +oprofile-y += $(addprefix ../../../drivers/oprofile/,$(oprofile-core-y)) > + > +ccflags-y+= -Werror > diff --git a/arch/metag/oprofile/backtrace.c b/arch/metag/oprofile/backtrace.c > new file mode 100644 > index 000..0ae7489 > --- /dev/null > +++ b/arch/metag/oprofile/backtrace.c > @@ -0,0 +1,134 @@ > +/* > + * Copyright (C) 2010 Imagination Technologies Ltd. > + * > + * This file is subject to the terms and conditions of the GNU General Public > + * License. See the file "COPYING" in the main directory of this archive > + * for more details. > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include "backtrace.h" > + > +#ifdef CONFIG_FRAME_POINTER > + > +#ifdef CONFIG_KALLSYMS > +#include > +#include > + > +static unsigned long tbi_boing_addr; > +static unsigned long tbi_boing_size; > +#endif > + > +static void user_backtrace_fp(unsigned long __user *fp, unsigned int depth) > +{ > + while (depth-- && access_ok(VERIFY_READ, fp, 8)) { > + unsigned long addr; > + unsigned long __user *fpnew; > + if (__copy_from_user_inatomic(, fp + 1, sizeof(addr))) > + break; > + addr -= 4; > + > + oprofile_add_trace(addr); > + > + /* stack grows up, so frame pointers must decrease */ > + if (__copy_from_user_inatomic(, fp + 0, sizeof(fpnew))) > + break; > + if (fpnew > fp) > + break; > + fp = fpnew; > + } > +} > + > +static void kernel_backtrace_fp(unsigned long *fp, unsigned long *stack, > + unsigned int depth) > +{ > +#ifdef CONFIG_KALLSYMS > + /* We need to know where TBIBoingVec is and it's size */ > + if (!tbi_boing_addr) { > + unsigned long size; > + unsigned long offset; > + char modname[MODULE_NAME_LEN]; > + char name[KSYM_NAME_LEN]; > + tbi_boing_addr = kallsyms_lookup_name("___TBIBoingVec"); > + if (!tbi_boing_addr) > + tbi_boing_addr = 1; > + else if (!lookup_symbol_attrs(tbi_boing_addr, , > + , modname, name)) > + tbi_boing_size = size; > + } > +#endif > + /* detect when the frame pointer has been used for other purposes and > + * doesn't point to the stack (it may point completely elsewhere which > + * kstack_end may not detect). > + */ > + while (depth-- && fp >= stack && fp + 8 <= stack + THREAD_SIZE) { > +
Re: [PATCH v3 41/44] metag: OProfile
On 01/10/2013 09:31 AM, James Hogan wrote: Add oprofile support for metag. Signed-off-by: James Hogan james.ho...@imgtec.com Cc: Robert Richter r...@kernel.org Cc: oprofile-l...@lists.sf.net --- arch/metag/Kconfig|1 + arch/metag/Makefile |2 + arch/metag/oprofile/Makefile | 16 ++ arch/metag/oprofile/backtrace.c | 134 ++ arch/metag/oprofile/backtrace.h |6 + arch/metag/oprofile/op_model_meta12.c | 242 + 6 files changed, 401 insertions(+), 0 deletions(-) create mode 100644 arch/metag/oprofile/Makefile create mode 100644 arch/metag/oprofile/backtrace.c create mode 100644 arch/metag/oprofile/backtrace.h create mode 100644 arch/metag/oprofile/op_model_meta12.c diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig index e2235e3..98433e5 100644 --- a/arch/metag/Kconfig +++ b/arch/metag/Kconfig @@ -25,6 +25,7 @@ config METAG select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MOD_ARCH_SPECIFIC + select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_SYSCALL_TRACEPOINTS select IRQ_DOMAIN diff --git a/arch/metag/Makefile b/arch/metag/Makefile index d455140..53fc094 100644 --- a/arch/metag/Makefile +++ b/arch/metag/Makefile @@ -49,6 +49,8 @@ core-y += arch/metag/mm/ libs-y += arch/metag/lib/ libs-y += arch/metag/tbx/ +drivers-$(CONFIG_OPROFILE) += arch/metag/oprofile/ + boot := arch/metag/boot boot_targets += uImage diff --git a/arch/metag/oprofile/Makefile b/arch/metag/oprofile/Makefile new file mode 100644 index 000..4b4ceee --- /dev/null +++ b/arch/metag/oprofile/Makefile @@ -0,0 +1,16 @@ +obj-y+= oprofile.o + +oprofile-core-y += buffer_sync.o +oprofile-core-y += cpu_buffer.o +oprofile-core-y += event_buffer.o +oprofile-core-y += oprof.o +oprofile-core-y += oprofile_files.o +oprofile-core-y += oprofile_stats.o +oprofile-core-y += oprofilefs.o +oprofile-core-y += timer_int.o + +oprofile-y += backtrace.o +oprofile-y += op_model_meta12.o +oprofile-y += $(addprefix ../../../drivers/oprofile/,$(oprofile-core-y)) + +ccflags-y+= -Werror diff --git a/arch/metag/oprofile/backtrace.c b/arch/metag/oprofile/backtrace.c new file mode 100644 index 000..0ae7489 --- /dev/null +++ b/arch/metag/oprofile/backtrace.c @@ -0,0 +1,134 @@ +/* + * Copyright (C) 2010 Imagination Technologies Ltd. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include linux/oprofile.h +#include linux/sched.h +#include linux/mm.h +#include linux/io.h +#include linux/uaccess.h +#include backtrace.h + +#ifdef CONFIG_FRAME_POINTER + +#ifdef CONFIG_KALLSYMS +#include linux/kallsyms.h +#include linux/module.h + +static unsigned long tbi_boing_addr; +static unsigned long tbi_boing_size; +#endif + +static void user_backtrace_fp(unsigned long __user *fp, unsigned int depth) +{ + while (depth-- access_ok(VERIFY_READ, fp, 8)) { + unsigned long addr; + unsigned long __user *fpnew; + if (__copy_from_user_inatomic(addr, fp + 1, sizeof(addr))) + break; + addr -= 4; + + oprofile_add_trace(addr); + + /* stack grows up, so frame pointers must decrease */ + if (__copy_from_user_inatomic(fpnew, fp + 0, sizeof(fpnew))) + break; + if (fpnew fp) + break; + fp = fpnew; + } +} + +static void kernel_backtrace_fp(unsigned long *fp, unsigned long *stack, + unsigned int depth) +{ +#ifdef CONFIG_KALLSYMS + /* We need to know where TBIBoingVec is and it's size */ + if (!tbi_boing_addr) { + unsigned long size; + unsigned long offset; + char modname[MODULE_NAME_LEN]; + char name[KSYM_NAME_LEN]; + tbi_boing_addr = kallsyms_lookup_name(___TBIBoingVec); + if (!tbi_boing_addr) + tbi_boing_addr = 1; + else if (!lookup_symbol_attrs(tbi_boing_addr, size, + offset, modname, name)) + tbi_boing_size = size; + } +#endif + /* detect when the frame pointer has been used for other purposes and + * doesn't point to the stack (it may point completely elsewhere which + * kstack_end may not detect). + */ + while (depth-- fp = stack fp + 8 = stack + THREAD_SIZE) { + unsigned
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
I have applied the "cleanup" patch that Arnd sent, but had to fix up a few things: - Bug fix: Initialize retval in spu_task_sync.c, line 95, otherwise OProfile this function returns non-zero and OProfile fails. - Remove unused codes in include/linux/oprofile.h - Compile warnings: Initialize offset and spu_cookie at lines 283 and 284 in spu_task_sync.c With these changes and some userspace changes that were necessary to correspond with Arnd's changes, our testing was successful. A fixup patch is attached. P.S. We have a single patch with all these changes applied if anyone would like us to post it. -Maynard Arnd Bergmann wrote: On Thursday 22 February 2007, Carl Love wrote: This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. There was a significant amount of whitespace breakage in this patch, which I cleaned up. The patch below consists of the other things I changed as a further cleanup. Note that I changed the format of the context switch record, which I found too complicated, as I described on IRC last week. Arnd <>< diff -paur linux-orig/arch/powerpc/oprofile/cell/spu_task_sync.c linux-new/arch/powerpc/oprofile/cell/spu_task_sync.c --- linux-orig/arch/powerpc/oprofile/cell/spu_task_sync.c 2007-02-27 17:10:24.0 -0600 +++ linux-new/arch/powerpc/oprofile/cell/spu_task_sync.c 2007-02-27 17:08:57.0 -0600 @@ -92,7 +92,7 @@ prepare_cached_spu_info(struct spu * spu { unsigned long flags; struct vma_to_fileoffset_map * new_map; - int retval; + int retval = 0; struct cached_info * info; /* We won't bother getting cache_lock here since @@ -280,8 +280,8 @@ static int process_context_switch(struct { unsigned long flags; int retval; - unsigned int offset; - unsigned long spu_cookie, app_dcookie; + unsigned int offset = 0; + unsigned long spu_cookie = 0, app_dcookie; retval = prepare_cached_spu_info(spu, objectId); if (retval) goto out; diff -paur linux-orig/include/linux/oprofile.h linux-new/include/linux/oprofile.h --- linux-orig/include/linux/oprofile.h 2007-02-27 14:41:29.0 -0600 +++ linux-new/include/linux/oprofile.h 2007-02-27 14:43:18.0 -0600 @@ -36,9 +36,6 @@ #define XEN_ENTER_SWITCH_CODE 10 #define SPU_PROFILING_CODE 11 #define SPU_CTX_SWITCH_CODE12 -#define SPU_OFFSET_CODE13 -#define SPU_COOKIE_CODE14 -#define SPU_SHLIB_COOKIE_CODE 15 struct super_block; struct dentry;
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
I have applied the cleanup patch that Arnd sent, but had to fix up a few things: - Bug fix: Initialize retval in spu_task_sync.c, line 95, otherwise OProfile this function returns non-zero and OProfile fails. - Remove unused codes in include/linux/oprofile.h - Compile warnings: Initialize offset and spu_cookie at lines 283 and 284 in spu_task_sync.c With these changes and some userspace changes that were necessary to correspond with Arnd's changes, our testing was successful. A fixup patch is attached. P.S. We have a single patch with all these changes applied if anyone would like us to post it. -Maynard Arnd Bergmann wrote: On Thursday 22 February 2007, Carl Love wrote: This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. There was a significant amount of whitespace breakage in this patch, which I cleaned up. The patch below consists of the other things I changed as a further cleanup. Note that I changed the format of the context switch record, which I found too complicated, as I described on IRC last week. Arnd diff -paur linux-orig/arch/powerpc/oprofile/cell/spu_task_sync.c linux-new/arch/powerpc/oprofile/cell/spu_task_sync.c --- linux-orig/arch/powerpc/oprofile/cell/spu_task_sync.c 2007-02-27 17:10:24.0 -0600 +++ linux-new/arch/powerpc/oprofile/cell/spu_task_sync.c 2007-02-27 17:08:57.0 -0600 @@ -92,7 +92,7 @@ prepare_cached_spu_info(struct spu * spu { unsigned long flags; struct vma_to_fileoffset_map * new_map; - int retval; + int retval = 0; struct cached_info * info; /* We won't bother getting cache_lock here since @@ -280,8 +280,8 @@ static int process_context_switch(struct { unsigned long flags; int retval; - unsigned int offset; - unsigned long spu_cookie, app_dcookie; + unsigned int offset = 0; + unsigned long spu_cookie = 0, app_dcookie; retval = prepare_cached_spu_info(spu, objectId); if (retval) goto out; diff -paur linux-orig/include/linux/oprofile.h linux-new/include/linux/oprofile.h --- linux-orig/include/linux/oprofile.h 2007-02-27 14:41:29.0 -0600 +++ linux-new/include/linux/oprofile.h 2007-02-27 14:43:18.0 -0600 @@ -36,9 +36,6 @@ #define XEN_ENTER_SWITCH_CODE 10 #define SPU_PROFILING_CODE 11 #define SPU_CTX_SWITCH_CODE12 -#define SPU_OFFSET_CODE13 -#define SPU_COOKIE_CODE14 -#define SPU_SHLIB_COOKIE_CODE 15 struct super_block; struct dentry;
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Maynard Johnson wrote: Arnd Bergmann wrote: On Friday 16 February 2007 01:32, Maynard Johnson wrote: config OPROFILE_CELL bool "OProfile for Cell Broadband Engine" depends on OPROFILE && SPU_FS default y if ((SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)) help Profiling of Cell BE SPUs requires special support enabled by this option. Both SPU_FS and OPROFILE options must be set 'y' or both be set 'm'. = Can anyone see a problem with any of this . . . or perhaps a suggestion of a better way? The text suggests it doesn't allow SPU_FS=y with OPROFILE=m, which I think should be allowed. Right, good catch. I'll add another OR to the 'default y' and correct the text. Actually, it makes more sense to do the following: config OPROFILE_CELL bool "OProfile for Cell Broadband Engine" depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m) default y help Profiling of Cell BE SPUs requires special support enabled by this option. > I also don't see any place in the code where you actually use CONFIG_OPROFILE_CELL. As I mentioned, I will use CONFIG_OPROFILE_CELL in the arch/powerpc/oprofile/Makefile as follows: oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o cell/spu_task_sync.o [snip] Arnd <>< ___ Linuxppc-dev mailing list [EMAIL PROTECTED] https://ozlabs.org/mailman/listinfo/linuxppc-dev - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Maynard Johnson wrote: Arnd Bergmann wrote: On Friday 16 February 2007 01:32, Maynard Johnson wrote: config OPROFILE_CELL bool OProfile for Cell Broadband Engine depends on OPROFILE SPU_FS default y if ((SPU_FS = y OPROFILE = y) || (SPU_FS = m OPROFILE = m)) help Profiling of Cell BE SPUs requires special support enabled by this option. Both SPU_FS and OPROFILE options must be set 'y' or both be set 'm'. = Can anyone see a problem with any of this . . . or perhaps a suggestion of a better way? The text suggests it doesn't allow SPU_FS=y with OPROFILE=m, which I think should be allowed. Right, good catch. I'll add another OR to the 'default y' and correct the text. Actually, it makes more sense to do the following: config OPROFILE_CELL bool OProfile for Cell Broadband Engine depends on (SPU_FS = y OPROFILE = m) || (SPU_FS = y OPROFILE = y) || (SPU_FS = m OPROFILE = m) default y help Profiling of Cell BE SPUs requires special support enabled by this option. I also don't see any place in the code where you actually use CONFIG_OPROFILE_CELL. As I mentioned, I will use CONFIG_OPROFILE_CELL in the arch/powerpc/oprofile/Makefile as follows: oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o cell/spu_task_sync.o [snip] Arnd ___ Linuxppc-dev mailing list [EMAIL PROTECTED] https://ozlabs.org/mailman/listinfo/linuxppc-dev - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Arnd Bergmann wrote: On Friday 16 February 2007 01:32, Maynard Johnson wrote: config OPROFILE_CELL bool "OProfile for Cell Broadband Engine" depends on OPROFILE && SPU_FS default y if ((SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)) help Profiling of Cell BE SPUs requires special support enabled by this option. Both SPU_FS and OPROFILE options must be set 'y' or both be set 'm'. = Can anyone see a problem with any of this . . . or perhaps a suggestion of a better way? The text suggests it doesn't allow SPU_FS=y with OPROFILE=m, which I think should be allowed. Right, good catch. I'll add another OR to the 'default y' and correct the text. > I also don't see any place in the code where you actually use CONFIG_OPROFILE_CELL. As I mentioned, I will use CONFIG_OPROFILE_CELL in the arch/powerpc/oprofile/Makefile as follows: oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o cell/spu_task_sync.o Ideally, you should be able to have an oprofile_spu module that can be loaded after spufs.ko and oprofile.ko. In that case you only need config OPROFILE_SPU depends on OPROFILE && SPU_FS default y and it will automatically build oprofile_spu as a module if one of the two is a module and won't build it if one of them is disabled. Hmmm . . . I guess that would entail splitting out the SPU-related stuff from op_model_cell.c into a new file. Maybe more -- that's just what comes to mind right now. Could be very tricky, and I wonder if it's worth the bother. Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Arnd Bergmann wrote: On Friday 16 February 2007 01:32, Maynard Johnson wrote: config OPROFILE_CELL bool OProfile for Cell Broadband Engine depends on OPROFILE SPU_FS default y if ((SPU_FS = y OPROFILE = y) || (SPU_FS = m OPROFILE = m)) help Profiling of Cell BE SPUs requires special support enabled by this option. Both SPU_FS and OPROFILE options must be set 'y' or both be set 'm'. = Can anyone see a problem with any of this . . . or perhaps a suggestion of a better way? The text suggests it doesn't allow SPU_FS=y with OPROFILE=m, which I think should be allowed. Right, good catch. I'll add another OR to the 'default y' and correct the text. I also don't see any place in the code where you actually use CONFIG_OPROFILE_CELL. As I mentioned, I will use CONFIG_OPROFILE_CELL in the arch/powerpc/oprofile/Makefile as follows: oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o cell/spu_task_sync.o Ideally, you should be able to have an oprofile_spu module that can be loaded after spufs.ko and oprofile.ko. In that case you only need config OPROFILE_SPU depends on OPROFILE SPU_FS default y and it will automatically build oprofile_spu as a module if one of the two is a module and won't build it if one of them is disabled. Hmmm . . . I guess that would entail splitting out the SPU-related stuff from op_model_cell.c into a new file. Maybe more -- that's just what comes to mind right now. Could be very tricky, and I wonder if it's worth the bother. Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Arnd Bergmann wrote: On Thursday 15 February 2007 00:52, Carl Love wrote: --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/Kconfig 2007-01-18 16:43:14.0 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/Kconfig 2007-02-13 19:04:46.271028904 -0600 @@ -7,7 +7,8 @@ config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" - depends on PROFILING + default m + depends on SPU_FS && PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, Milton already commented on this being wrong. I think what you want is depends on PROFILING && (SPU_FS = n || SPU_FS) that should make sure that when SPU_FS=y that OPROFILE can not be 'm'. The above suggestion would not work if SPU_FS is not defined, since the entire config option is ignored if an undefined symbol is used. So, here's what I propose instead: - Leave the existing 'config OPROFILE' unchanged from its current form in mainline (shown below) - Add the new 'config OPROFILE_CELL' (shown below) - In arch/powerpc/configs/cell-defconfig, set CONFIG_OPROFILE=m, to correspond to setting for CONFIG_SPU_FS - In arch/powerpc/oprofile/Makefile, do the following: oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o cell/spu_task_sync.o === config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" depends on PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, and applications. If unsure, say N. config OPROFILE_CELL bool "OProfile for Cell Broadband Engine" depends on OPROFILE && SPU_FS default y if ((SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)) help Profiling of Cell BE SPUs requires special support enabled by this option. Both SPU_FS and OPROFILE options must be set 'y' or both be set 'm'. = Can anyone see a problem with any of this . . . or perhaps a suggestion of a better way? Thanks. -Maynard - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Arnd Bergmann wrote: On Thursday 15 February 2007 00:52, Carl Love wrote: --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/Kconfig 2007-01-18 16:43:14.0 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/Kconfig 2007-02-13 19:04:46.271028904 -0600 @@ -7,7 +7,8 @@ config OPROFILE tristate "OProfile system profiling (EXPERIMENTAL)" - depends on PROFILING + default m + depends on SPU_FS && PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, Milton already commented on this being wrong. I think what you want is depends on PROFILING && (SPU_FS = n || SPU_FS) that should make sure that when SPU_FS=y that OPROFILE can not be 'm'. Blast it! I did this right on our development system, but neglected to update the patch correctly to remove this dependency and 'default m'. I'll fix in the next patch. @@ -15,3 +16,10 @@ If unsure, say N. +config OPROFILE_CELL + bool "OProfile for Cell Broadband Engine" + depends on SPU_FS && OPROFILE + default y + help + OProfile for Cell BE requires special support enabled + by this option. You should at least mention that this allows profiling the spus. OK. +#define EFWCALL ENOSYS /* Use an existing error number that is as +* close as possible for a FW call that failed. +* The probability of the call failing is +* very low. Passing up the error number +* ensures that the user will see an error +* message saying OProfile did not start. +* Dmesg will contain an accurate message +* about the failure. +*/ ENOSYS looks wrong though. It would appear to the user as if the oprofile function in the kernel was not present. I'd suggest EIO, and not use an extra define for that. Carl will reply to this. static int rtas_ibm_cbe_perftools(int subfunc, int passthru, void *address, unsigned long length) { u64 paddr = __pa(address); - return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, -paddr >> 32, paddr & 0x, length); + pm_rtas_token = rtas_token("ibm,cbe-perftools"); + + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { + printk(KERN_ERR + "%s: rtas token ibm,cbe-perftools unknown\n", + __FUNCTION__); + return -EFWCALL; + } else { + + return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, + passthru, paddr >> 32, paddr & 0x, length); + } } Are you now reading the rtas token every time you call rtas? that seems like a waste of time. Carl will reply. +#define size 24 +#define ENTRIES (0x1<<8) /* 256 */ +#define MAXLFSR 0xFF + +int initial_lfsr[] = +{16777215, 3797240, 13519805, 11602690, 6497030, 7614675, 2328937, 2889445, + 12364575, 8723156, 2450594, 16280864, 14742496, 10904589, 6434212, 4996256, + 5814270, 13014041, 9825245, 410260, 904096, 15151047, 15487695, 3061843, + 16482682, 7938572, 4893279, 9390321, 4320879, 5686402, 1711063, 10176714, + 4512270, 1057359, 16700434, 5731602, 2070114, 16030890, 1208230, 15603106, + 11857845, 6470172, 1362790, 7316876, 8534496, 1629197, 10003072, 1714539, + 1814669, 7106700, 5427154, 3395151, 3683327, 12950450, 16620273, 12122372, + 7194999, 9952750, 3608260, 13604295, 2266835, 14943567, 7079230, 777380, + 4516801, 1737661, 8730333, 13796927, 3247181, 9950017, 3481896, 16527555, + 13116123, 14505033, 9781119, 4860212, 7403253, 13264219, 12269980, 100120, + 664506, 607795, 8274553, 13133688, 6215305, 13208866, 16439693, 3320753, + 8773582, 13874619, 1784784, 4513501, 11002978, 9318515, 3038856, 14254582, + 15484958, 15967857, 13504461, 13657322, 14724513, 13955736, 5695315, 7330509, + 12630101, 6826854, 439712, 4609055, 13288878, 1309632, 4996398, 11392266, + 793740, 7653789, 2472670, 14641200, 5164364, 5482529, 10415855, 1629108, + 2012376, 13661123, 14655718, 9534083, 16637925, 2537745, 9787923, 12750103, + 4660370, 3283461, 14862772, 7034955, 6679872, 8918232, 6506913, 103649, + 6085577, 13324033, 14251613, 11058220, 11998181, 3100233, 468898, 7104918, + 12498413, 14408165, 1208514, 15712321, 3088687, 14778333, 3632503, 11151952, + 98896, 9159367, 8866146, 4780737, 4925758, 12362320, 4122783, 8543358, + 7056879, 10876914, 6282881, 1686625, 5100373, 4573666, 9265515, 13593840, + 5853060, 110, 4237111, 1576, 14344137, 4608332, 6590210, 13745050, + 10916568, 12340402, 7145275, 4417153, 2300360, 12079643, 7608534, 15238251, + 4947424, 7014722, 3984546, 7168073, 10759589, 16293080, 3757181, 4577717, + 5163790,
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Arnd Bergmann wrote: On Thursday 15 February 2007 00:52, Carl Love wrote: --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/Kconfig 2007-01-18 16:43:14.0 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/Kconfig 2007-02-13 19:04:46.271028904 -0600 @@ -7,7 +7,8 @@ config OPROFILE tristate OProfile system profiling (EXPERIMENTAL) - depends on PROFILING + default m + depends on SPU_FS PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, Milton already commented on this being wrong. I think what you want is depends on PROFILING (SPU_FS = n || SPU_FS) that should make sure that when SPU_FS=y that OPROFILE can not be 'm'. Blast it! I did this right on our development system, but neglected to update the patch correctly to remove this dependency and 'default m'. I'll fix in the next patch. @@ -15,3 +16,10 @@ If unsure, say N. +config OPROFILE_CELL + bool OProfile for Cell Broadband Engine + depends on SPU_FS OPROFILE + default y + help + OProfile for Cell BE requires special support enabled + by this option. You should at least mention that this allows profiling the spus. OK. +#define EFWCALL ENOSYS /* Use an existing error number that is as +* close as possible for a FW call that failed. +* The probability of the call failing is +* very low. Passing up the error number +* ensures that the user will see an error +* message saying OProfile did not start. +* Dmesg will contain an accurate message +* about the failure. +*/ ENOSYS looks wrong though. It would appear to the user as if the oprofile function in the kernel was not present. I'd suggest EIO, and not use an extra define for that. Carl will reply to this. static int rtas_ibm_cbe_perftools(int subfunc, int passthru, void *address, unsigned long length) { u64 paddr = __pa(address); - return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru, -paddr 32, paddr 0x, length); + pm_rtas_token = rtas_token(ibm,cbe-perftools); + + if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) { + printk(KERN_ERR + %s: rtas token ibm,cbe-perftools unknown\n, + __FUNCTION__); + return -EFWCALL; + } else { + + return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, + passthru, paddr 32, paddr 0x, length); + } } Are you now reading the rtas token every time you call rtas? that seems like a waste of time. Carl will reply. +#define size 24 +#define ENTRIES (0x18) /* 256 */ +#define MAXLFSR 0xFF + +int initial_lfsr[] = +{16777215, 3797240, 13519805, 11602690, 6497030, 7614675, 2328937, 2889445, + 12364575, 8723156, 2450594, 16280864, 14742496, 10904589, 6434212, 4996256, + 5814270, 13014041, 9825245, 410260, 904096, 15151047, 15487695, 3061843, + 16482682, 7938572, 4893279, 9390321, 4320879, 5686402, 1711063, 10176714, + 4512270, 1057359, 16700434, 5731602, 2070114, 16030890, 1208230, 15603106, + 11857845, 6470172, 1362790, 7316876, 8534496, 1629197, 10003072, 1714539, + 1814669, 7106700, 5427154, 3395151, 3683327, 12950450, 16620273, 12122372, + 7194999, 9952750, 3608260, 13604295, 2266835, 14943567, 7079230, 777380, + 4516801, 1737661, 8730333, 13796927, 3247181, 9950017, 3481896, 16527555, + 13116123, 14505033, 9781119, 4860212, 7403253, 13264219, 12269980, 100120, + 664506, 607795, 8274553, 13133688, 6215305, 13208866, 16439693, 3320753, + 8773582, 13874619, 1784784, 4513501, 11002978, 9318515, 3038856, 14254582, + 15484958, 15967857, 13504461, 13657322, 14724513, 13955736, 5695315, 7330509, + 12630101, 6826854, 439712, 4609055, 13288878, 1309632, 4996398, 11392266, + 793740, 7653789, 2472670, 14641200, 5164364, 5482529, 10415855, 1629108, + 2012376, 13661123, 14655718, 9534083, 16637925, 2537745, 9787923, 12750103, + 4660370, 3283461, 14862772, 7034955, 6679872, 8918232, 6506913, 103649, + 6085577, 13324033, 14251613, 11058220, 11998181, 3100233, 468898, 7104918, + 12498413, 14408165, 1208514, 15712321, 3088687, 14778333, 3632503, 11151952, + 98896, 9159367, 8866146, 4780737, 4925758, 12362320, 4122783, 8543358, + 7056879, 10876914, 6282881, 1686625, 5100373, 4573666, 9265515, 13593840, + 5853060, 110, 4237111, 1576, 14344137, 4608332, 6590210, 13745050, + 10916568, 12340402, 7145275, 4417153, 2300360, 12079643, 7608534, 15238251, + 4947424, 7014722, 3984546, 7168073, 10759589, 16293080, 3757181, 4577717, + 5163790, 2488841, 4650617, 3650022,
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Arnd Bergmann wrote: On Thursday 15 February 2007 00:52, Carl Love wrote: --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/Kconfig 2007-01-18 16:43:14.0 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/Kconfig 2007-02-13 19:04:46.271028904 -0600 @@ -7,7 +7,8 @@ config OPROFILE tristate OProfile system profiling (EXPERIMENTAL) - depends on PROFILING + default m + depends on SPU_FS PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, Milton already commented on this being wrong. I think what you want is depends on PROFILING (SPU_FS = n || SPU_FS) that should make sure that when SPU_FS=y that OPROFILE can not be 'm'. The above suggestion would not work if SPU_FS is not defined, since the entire config option is ignored if an undefined symbol is used. So, here's what I propose instead: - Leave the existing 'config OPROFILE' unchanged from its current form in mainline (shown below) - Add the new 'config OPROFILE_CELL' (shown below) - In arch/powerpc/configs/cell-defconfig, set CONFIG_OPROFILE=m, to correspond to setting for CONFIG_SPU_FS - In arch/powerpc/oprofile/Makefile, do the following: oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o cell/spu_task_sync.o === config OPROFILE tristate OProfile system profiling (EXPERIMENTAL) depends on PROFILING help OProfile is a profiling system capable of profiling the whole system, include the kernel, kernel modules, libraries, and applications. If unsure, say N. config OPROFILE_CELL bool OProfile for Cell Broadband Engine depends on OPROFILE SPU_FS default y if ((SPU_FS = y OPROFILE = y) || (SPU_FS = m OPROFILE = m)) help Profiling of Cell BE SPUs requires special support enabled by this option. Both SPU_FS and OPROFILE options must be set 'y' or both be set 'm'. = Can anyone see a problem with any of this . . . or perhaps a suggestion of a better way? Thanks. -Maynard - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Milton, Thank you for your comments. Carl will reply to certain parts of your posting where he's more knowledgeable than I. See my replies below. -Maynard Milton Miller wrote: On Feb 6, 2007, at 5:02 PM, Carl Love wrote: This is the first update to the patch previously posted by Maynard Johnson as "PATCH 4/4. Add support to OProfile for profiling CELL". [snip] Data collected The current patch starts tackling these translation issues for the presently common case of a static self contained binary from a single file, either single separate source file or embedded in the data of the host application. When creating the trace entry for a SPU context switch, it records the application owner, pid, tid, and dcookie of the main executable. It addition, it looks up the object-id as a virtual address and records the offset if it is non-zero, or the dcookie of the object if it is zero. The code then creates a data structure by reading the elf headers from the user process (at the address given by the object-id) and building a list of SPU address to elf object offsets, as specified by the ELF loader headers. In addition to the elf loader section, it processes the overlay headers and records the address, size, and magic number of the overlay. When the hardware trace entries are processed, each address is looked up this structure and translated to the elf offset. If it is an overlay region, the overlay identify word is read and the list is searched for the matching overlay. The resulting offset is sent to the oprofile system. The current patch specifically identifies that only single elf objects are handled. There is no code to handle dynamic linked libraries or overlays. Nor is there any method to Yes, we do handle overlays. (Note: I'm looking into a bug right now in our overlay support.) present samples that may have been collected during context switch processing, they must be discarded. My proposal is to change what is presented to user space. Instead of trying to translate the SPU address to the backing file as the samples are recorded, store the samples as the SPU context and address. The context switch would record tid, pid, object id as it does now. In addition, if this is a new object-id, the kernel would read elf headers as it does today. However, it would then proceed to provide accurate dcookie information for each loader region and overlay. To identify which overlays are active, (instead of the present read on use and search the list to translate approach) the kernel would record the location of the overlay identifiers as it parsed the kernel, but would then read the identification word and would record the present value as an sample from a separate but related stream. The kernel could maintain the last value for each overlay and only send profile events for the deltas. Discussions on this topic in the past have resulted in the current implementation precisely because we're able to record the samples as fileoffsets, just as the userspace tools expect. I haven't had time to check out how much this would impact the userspace tools, but my gut feel is that it would be quite significant. If we were developing this module with a matching newly-created userspace tool, I would be more inclined to agree that this makes sense. But you give no rationale for your proposal that justifies the change. The current implementation works, it has no impact on normal, non-profiling behavior, and the overhead during profiling is not noticeable. This approach trades translation lookup overhead for each recorded sample for a burst of data on new context activation. In addition it exposes the sample point of the overlay identifier vs the address collection. This allows the ambiguity to be exposed to user space. In addition, with the above proposed kernel timer vs sample collection, user space could limit the elapsed time between the address collection and the overlay id check. Yes, there is a window here where an overlay could occur before we finish processing a group of samples that were actually taken from a different overlay. The obvious way to prevent that is for the kernel (or SPUFS) to be notified of the overlay and let OProfile know that we need to drain (perhaps discard would be best) our sample trace buffer. As you indicate above, your proposal faces the same issue, but would just decrease the number of bogus samples. I contend that the relative number of bogus samples will be quite low in either case. Ideally, we should have a mechanism to eliminate them completely so as to avoid confusion the user's part when they're looking at a report. Even a few bogus samples in the wrong place can be troubling. Such a mechanism will be a good future enhancement. [snip] milton -- [EMAIL PROTECTED] Milton Miller Speaking for myself only. ___ Linuxppc-dev mailing l
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Michael, Thanks very much for the advice. Both issues have been solved now, with your help. -Maynard Michael Ellerman wrote: On Wed, 2007-02-07 at 09:41 -0600, Maynard Johnson wrote: Carl Love wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson <[EMAIL PROTECTED]> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig I've discovered more problems with the kref handling for the cached_info object that we store in the spu_context. :-( When the OProfile module initially determines that no cached_info yet exists for a given spu_context, it creates the cached_info, inits the cached_info's kref (which increments the refcount) and does a kref_get (for SPUFS' ref) before passing the cached_info reference off to SUPFS to store into the spu_context. When OProfile shuts down or the SPU job ends, OProfile gives up its ref to the cached_info with kref_put. Then when SPUFS destroys the spu_context, it also gives up its ref. HOWEVER . . . . If OProfile shuts down while the SPU job is still active _and_ _then_ is restarted while the job is still active, OProfile will find that the cached_info exists for the given spu_context, so it won't go through the process of creating it and doing kref_init on the kref. Under this scenario, OProfile does not own a ref of its own to the cached_info, and should not be doing a kref_put when done using the cached_info -- but it does, and so does SPUFS when the spu_context is destroyed. The end result (with the code as currently written) is that an extra kref_put is done when the refcount is already down to zero. To fix this, OProfile needs to detect when it finds an existing cached_info already stored in the spu_context. Then, instead of creating a new one, it sets a reminder flag to be used later when it's done using the cached info to indicate whether or not it needs to call kref_put. I think all you want to do is when oprofile finds the cached_info already existing, it does a kref_get(). After all it doesn't have a reference to it, so before it starts using it it must inc the ref count. Unfortunately, there's another problem (one that should have been obvious to me). The cached_info's kref "release" function is destroy_cached_info(), defined in the OProfile module. If the OProfile module is unloaded when SPUFS destroys the spu_context and calls kref_put on the cached_info's kref -- KABOOM! The destroy_cached_info function (the second arg to kref_put) is not in memory, so we get a paging fault. I see a couple options to solve this: 1) Don't store the cached_info in the spu_context. Basically, go back to the simplistic model of creating/deleting the cached_info on every SPU task activation/deactivation. 2) If there's some way to do this, force the OProfile module to stay loaded until SPUFS has destroyed its last spu_context that holds a cached_info object. There is a mechanism for that, you just have each cached_info inc the module's refcount. Another option would be to have a mapping, in the oprofile code, from spu_contexts to cached_infos, ie. a hash table or something. cheers - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Michael, Thanks very much for the advice. Both issues have been solved now, with your help. -Maynard Michael Ellerman wrote: On Wed, 2007-02-07 at 09:41 -0600, Maynard Johnson wrote: Carl Love wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson [EMAIL PROTECTED] This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig I've discovered more problems with the kref handling for the cached_info object that we store in the spu_context. :-( When the OProfile module initially determines that no cached_info yet exists for a given spu_context, it creates the cached_info, inits the cached_info's kref (which increments the refcount) and does a kref_get (for SPUFS' ref) before passing the cached_info reference off to SUPFS to store into the spu_context. When OProfile shuts down or the SPU job ends, OProfile gives up its ref to the cached_info with kref_put. Then when SPUFS destroys the spu_context, it also gives up its ref. HOWEVER . . . . If OProfile shuts down while the SPU job is still active _and_ _then_ is restarted while the job is still active, OProfile will find that the cached_info exists for the given spu_context, so it won't go through the process of creating it and doing kref_init on the kref. Under this scenario, OProfile does not own a ref of its own to the cached_info, and should not be doing a kref_put when done using the cached_info -- but it does, and so does SPUFS when the spu_context is destroyed. The end result (with the code as currently written) is that an extra kref_put is done when the refcount is already down to zero. To fix this, OProfile needs to detect when it finds an existing cached_info already stored in the spu_context. Then, instead of creating a new one, it sets a reminder flag to be used later when it's done using the cached info to indicate whether or not it needs to call kref_put. I think all you want to do is when oprofile finds the cached_info already existing, it does a kref_get(). After all it doesn't have a reference to it, so before it starts using it it must inc the ref count. Unfortunately, there's another problem (one that should have been obvious to me). The cached_info's kref release function is destroy_cached_info(), defined in the OProfile module. If the OProfile module is unloaded when SPUFS destroys the spu_context and calls kref_put on the cached_info's kref -- KABOOM! The destroy_cached_info function (the second arg to kref_put) is not in memory, so we get a paging fault. I see a couple options to solve this: 1) Don't store the cached_info in the spu_context. Basically, go back to the simplistic model of creating/deleting the cached_info on every SPU task activation/deactivation. 2) If there's some way to do this, force the OProfile module to stay loaded until SPUFS has destroyed its last spu_context that holds a cached_info object. There is a mechanism for that, you just have each cached_info inc the module's refcount. Another option would be to have a mapping, in the oprofile code, from spu_contexts to cached_infos, ie. a hash table or something. cheers - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Milton, Thank you for your comments. Carl will reply to certain parts of your posting where he's more knowledgeable than I. See my replies below. -Maynard Milton Miller wrote: On Feb 6, 2007, at 5:02 PM, Carl Love wrote: This is the first update to the patch previously posted by Maynard Johnson as PATCH 4/4. Add support to OProfile for profiling CELL. [snip] Data collected The current patch starts tackling these translation issues for the presently common case of a static self contained binary from a single file, either single separate source file or embedded in the data of the host application. When creating the trace entry for a SPU context switch, it records the application owner, pid, tid, and dcookie of the main executable. It addition, it looks up the object-id as a virtual address and records the offset if it is non-zero, or the dcookie of the object if it is zero. The code then creates a data structure by reading the elf headers from the user process (at the address given by the object-id) and building a list of SPU address to elf object offsets, as specified by the ELF loader headers. In addition to the elf loader section, it processes the overlay headers and records the address, size, and magic number of the overlay. When the hardware trace entries are processed, each address is looked up this structure and translated to the elf offset. If it is an overlay region, the overlay identify word is read and the list is searched for the matching overlay. The resulting offset is sent to the oprofile system. The current patch specifically identifies that only single elf objects are handled. There is no code to handle dynamic linked libraries or overlays. Nor is there any method to Yes, we do handle overlays. (Note: I'm looking into a bug right now in our overlay support.) present samples that may have been collected during context switch processing, they must be discarded. My proposal is to change what is presented to user space. Instead of trying to translate the SPU address to the backing file as the samples are recorded, store the samples as the SPU context and address. The context switch would record tid, pid, object id as it does now. In addition, if this is a new object-id, the kernel would read elf headers as it does today. However, it would then proceed to provide accurate dcookie information for each loader region and overlay. To identify which overlays are active, (instead of the present read on use and search the list to translate approach) the kernel would record the location of the overlay identifiers as it parsed the kernel, but would then read the identification word and would record the present value as an sample from a separate but related stream. The kernel could maintain the last value for each overlay and only send profile events for the deltas. Discussions on this topic in the past have resulted in the current implementation precisely because we're able to record the samples as fileoffsets, just as the userspace tools expect. I haven't had time to check out how much this would impact the userspace tools, but my gut feel is that it would be quite significant. If we were developing this module with a matching newly-created userspace tool, I would be more inclined to agree that this makes sense. But you give no rationale for your proposal that justifies the change. The current implementation works, it has no impact on normal, non-profiling behavior, and the overhead during profiling is not noticeable. This approach trades translation lookup overhead for each recorded sample for a burst of data on new context activation. In addition it exposes the sample point of the overlay identifier vs the address collection. This allows the ambiguity to be exposed to user space. In addition, with the above proposed kernel timer vs sample collection, user space could limit the elapsed time between the address collection and the overlay id check. Yes, there is a window here where an overlay could occur before we finish processing a group of samples that were actually taken from a different overlay. The obvious way to prevent that is for the kernel (or SPUFS) to be notified of the overlay and let OProfile know that we need to drain (perhaps discard would be best) our sample trace buffer. As you indicate above, your proposal faces the same issue, but would just decrease the number of bogus samples. I contend that the relative number of bogus samples will be quite low in either case. Ideally, we should have a mechanism to eliminate them completely so as to avoid confusion the user's part when they're looking at a report. Even a few bogus samples in the wrong place can be troubling. Such a mechanism will be a good future enhancement. [snip] milton -- [EMAIL PROTECTED] Milton Miller Speaking for myself only. ___ Linuxppc-dev mailing list [EMAIL
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Carl Love wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson <[EMAIL PROTECTED]> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig I've discovered more problems with the kref handling for the cached_info object that we store in the spu_context. :-( When the OProfile module initially determines that no cached_info yet exists for a given spu_context, it creates the cached_info, inits the cached_info's kref (which increments the refcount) and does a kref_get (for SPUFS' ref) before passing the cached_info reference off to SUPFS to store into the spu_context. When OProfile shuts down or the SPU job ends, OProfile gives up its ref to the cached_info with kref_put. Then when SPUFS destroys the spu_context, it also gives up its ref. HOWEVER . . . . If OProfile shuts down while the SPU job is still active _and_ _then_ is restarted while the job is still active, OProfile will find that the cached_info exists for the given spu_context, so it won't go through the process of creating it and doing kref_init on the kref. Under this scenario, OProfile does not own a ref of its own to the cached_info, and should not be doing a kref_put when done using the cached_info -- but it does, and so does SPUFS when the spu_context is destroyed. The end result (with the code as currently written) is that an extra kref_put is done when the refcount is already down to zero. To fix this, OProfile needs to detect when it finds an existing cached_info already stored in the spu_context. Then, instead of creating a new one, it sets a reminder flag to be used later when it's done using the cached info to indicate whether or not it needs to call kref_put. Unfortunately, there's another problem (one that should have been obvious to me). The cached_info's kref "release" function is destroy_cached_info(), defined in the OProfile module. If the OProfile module is unloaded when SPUFS destroys the spu_context and calls kref_put on the cached_info's kref -- KABOOM! The destroy_cached_info function (the second arg to kref_put) is not in memory, so we get a paging fault. I see a couple options to solve this: 1) Don't store the cached_info in the spu_context. Basically, go back to the simplistic model of creating/deleting the cached_info on every SPU task activation/deactivation. 2) If there's some way to do this, force the OProfile module to stay loaded until SPUFS has destroyed its last spu_context that holds a cached_info object. I thought about putting the cached_info's kref "release" function in SPUFS, but this just won't work. This implies that SPUFS needs to know about the structure of the cached_info, e.g., that it contains the vma_map member that needs to be freed. But even with that information, it's not enough, since the vma_map member consists of list of vma_maps, which is why we have the vma_map_free() function. So SPUFS would still need access to vma_map_free() from the OProfile module. Opinions from others would be appreciated. Thanks. -Maynard +/* Container for caching information about an active SPU task. + * + */ +struct cached_info { + struct vma_to_fileoffset_map * map; + struct spu * the_spu; /* needed to access pointer to local_store */ + struct kref cache_ref; +}; + +static struct cached_info * spu_info[MAX_NUMNODES * 8]; + +static void destroy_cached_info(struct kref * kref) +{ + struct cached_info * info; + info = container_of(kref, struct cached_info, cache_ref); + vma_map_free(info->map); + kfree(info); +} + +/* Return the cached_info for the passed SPU number. + * + */ +static struct cached_info * get_cached_info(struct spu * the_spu, int spu_num) +{ + struct cached_info * ret_info = NULL; + unsigned long flags = 0; + if (spu_num >= num_spu_nodes) { + printk(KERN_ERR "SPU_PROF: " + "%s, line %d: Invalid index %d into spu info cache\n", + __FUNCTION__, __LINE__, spu_num); + goto out; + } + spin_lock_irqsave(_lock, flags); + if (!spu_info[spu_num] && the_spu) + spu_info[spu_num] = (struct cached_info *) + spu_get_profile_private(the_spu->ctx); + + ret_info = spu_info[spu_num]; + spin_unlock_irqrestore(_lock, flags); + out: + return ret_info; +} + + +/* Looks for cached info for the passed spu. If not found, the + * cached info is created for the passed spu. + * Returns 0 for success; otherwise, -1 for error. + */ +static int +prepare_cached_spu_info(
Re: [Cbe-oss-dev] [RFC, PATCH] CELL Oprofile SPU profiling updated patch
Carl Love wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson [EMAIL PROTECTED] This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig I've discovered more problems with the kref handling for the cached_info object that we store in the spu_context. :-( When the OProfile module initially determines that no cached_info yet exists for a given spu_context, it creates the cached_info, inits the cached_info's kref (which increments the refcount) and does a kref_get (for SPUFS' ref) before passing the cached_info reference off to SUPFS to store into the spu_context. When OProfile shuts down or the SPU job ends, OProfile gives up its ref to the cached_info with kref_put. Then when SPUFS destroys the spu_context, it also gives up its ref. HOWEVER . . . . If OProfile shuts down while the SPU job is still active _and_ _then_ is restarted while the job is still active, OProfile will find that the cached_info exists for the given spu_context, so it won't go through the process of creating it and doing kref_init on the kref. Under this scenario, OProfile does not own a ref of its own to the cached_info, and should not be doing a kref_put when done using the cached_info -- but it does, and so does SPUFS when the spu_context is destroyed. The end result (with the code as currently written) is that an extra kref_put is done when the refcount is already down to zero. To fix this, OProfile needs to detect when it finds an existing cached_info already stored in the spu_context. Then, instead of creating a new one, it sets a reminder flag to be used later when it's done using the cached info to indicate whether or not it needs to call kref_put. Unfortunately, there's another problem (one that should have been obvious to me). The cached_info's kref release function is destroy_cached_info(), defined in the OProfile module. If the OProfile module is unloaded when SPUFS destroys the spu_context and calls kref_put on the cached_info's kref -- KABOOM! The destroy_cached_info function (the second arg to kref_put) is not in memory, so we get a paging fault. I see a couple options to solve this: 1) Don't store the cached_info in the spu_context. Basically, go back to the simplistic model of creating/deleting the cached_info on every SPU task activation/deactivation. 2) If there's some way to do this, force the OProfile module to stay loaded until SPUFS has destroyed its last spu_context that holds a cached_info object. I thought about putting the cached_info's kref release function in SPUFS, but this just won't work. This implies that SPUFS needs to know about the structure of the cached_info, e.g., that it contains the vma_map member that needs to be freed. But even with that information, it's not enough, since the vma_map member consists of list of vma_maps, which is why we have the vma_map_free() function. So SPUFS would still need access to vma_map_free() from the OProfile module. Opinions from others would be appreciated. Thanks. -Maynard +/* Container for caching information about an active SPU task. + * + */ +struct cached_info { + struct vma_to_fileoffset_map * map; + struct spu * the_spu; /* needed to access pointer to local_store */ + struct kref cache_ref; +}; + +static struct cached_info * spu_info[MAX_NUMNODES * 8]; + +static void destroy_cached_info(struct kref * kref) +{ + struct cached_info * info; + info = container_of(kref, struct cached_info, cache_ref); + vma_map_free(info-map); + kfree(info); +} + +/* Return the cached_info for the passed SPU number. + * + */ +static struct cached_info * get_cached_info(struct spu * the_spu, int spu_num) +{ + struct cached_info * ret_info = NULL; + unsigned long flags = 0; + if (spu_num = num_spu_nodes) { + printk(KERN_ERR SPU_PROF: + %s, line %d: Invalid index %d into spu info cache\n, + __FUNCTION__, __LINE__, spu_num); + goto out; + } + spin_lock_irqsave(cache_lock, flags); + if (!spu_info[spu_num] the_spu) + spu_info[spu_num] = (struct cached_info *) + spu_get_profile_private(the_spu-ctx); + + ret_info = spu_info[spu_num]; + spin_unlock_irqrestore(cache_lock, flags); + out: + return ret_info; +} + + +/* Looks for cached info for the passed spu. If not found, the + * cached info is created for the passed spu. + * Returns 0 for success; otherwise, -1 for error. + */ +static int +prepare_cached_spu_info(struct spu * spu, unsigned int objectId) +{ + unsigned long flags = 0
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Sunday 04 February 2007 00:49, Maynard Johnson wrote: I seem to recall looking at this option a while back, but didn't go that route since struct spu_context is opaque to me. With such a teqnique, I could then use a simple 16-element array of pointers to cached_info objects, creating them as needed when spu_context->profile_private is NULL. I suppose the better option for now is to add a get_profile_private() function to SPUFs, rather than requiring spu_context to be visible. Yes, that sounds good. Note that the file providing the spufs_get_profile_private (and respective spufs_set_profile_private) functions needs to be compiled into the kernel then in case oprofile gets linked in but spufs is a module. Hmm . . . we already depend on the register/unregister functions in sched.c, so my patch changes the oprofile Kconfig to default to 'm' and 'depends on SPU_FS'. I think it would also be necessary to have another interface for cleaning up this data when spufs destroys the context. That could possibly a variation of the existing notifier call, or a new call, or you establish the convention that if the private pointer is non-NULL, spufs will kfree it. Yes, I was thnking along the lines of your last suggestion. I presume OProfile gets notified (object_id == 0) before the context is actually destroyed. At that time, we would NULL-out the reference to the cached_info, so then SPUFS would kfree it at destroy time. -Maynard Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Saturday 03 February 2007 21:03, Maynard Johnson wrote: I presume you mean 'object_id'. Right, sorry for the confusion. What you're asking for is a new requirement, and one which I don't believe is achievable in the current timeframe. Since this is spufs code that's dynamicaly loaded into the SPU at runtime, the symbol information for this code is not accessible to the userspace post-processing tools. We can always fix the user space tool later, but it is important to get at least the kernel interface right, so we can add the functionality later without breaking new user space on old kernels or vice versa. There's no obvious solution to this problem, so it's going to take some design effort to come up with a solution. We can work on the problem, but I don't think we can allow it to get in the way of getting our currently proposed SPU profiling functionality into the kernel. It would require an altogether different mechanism to record samples along with necessary information, not to mention the changes required in the post-processing tools. This will have to be a future enhancement. So what do you do with the samples for object_id == 0? I would expect them to be simply added to the sample buffer for the kernel, which is sort of special in oprofile anyway. There is no sample buffer for the kernel in SPU profiling. When OProfile gets notified of an SPU task switching out (object_if == 0), we stop recording samples for the corresponding SPU. If SPUFs sends the notification after the spu_save operation, we still would be collecting samples during that time; however, since the VMAs of these samples would not map to any fileoffset in the SPU binary that's executing, our current implementation throws them away. We could change that behavior and record them in the samples buffer as "anonymous samples". Not sure it that would help too much, as you wouldn't be able to distinguish between spu_save samples and samples from generated stubs that are executing from the stack. -Maynard Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Saturday 03 February 2007 21:03, Maynard Johnson wrote: I presume you mean 'object_id'. Right, sorry for the confusion. What you're asking for is a new requirement, and one which I don't believe is achievable in the current timeframe. Since this is spufs code that's dynamicaly loaded into the SPU at runtime, the symbol information for this code is not accessible to the userspace post-processing tools. We can always fix the user space tool later, but it is important to get at least the kernel interface right, so we can add the functionality later without breaking new user space on old kernels or vice versa. There's no obvious solution to this problem, so it's going to take some design effort to come up with a solution. We can work on the problem, but I don't think we can allow it to get in the way of getting our currently proposed SPU profiling functionality into the kernel. It would require an altogether different mechanism to record samples along with necessary information, not to mention the changes required in the post-processing tools. This will have to be a future enhancement. So what do you do with the samples for object_id == 0? I would expect them to be simply added to the sample buffer for the kernel, which is sort of special in oprofile anyway. There is no sample buffer for the kernel in SPU profiling. When OProfile gets notified of an SPU task switching out (object_if == 0), we stop recording samples for the corresponding SPU. If SPUFs sends the notification after the spu_save operation, we still would be collecting samples during that time; however, since the VMAs of these samples would not map to any fileoffset in the SPU binary that's executing, our current implementation throws them away. We could change that behavior and record them in the samples buffer as anonymous samples. Not sure it that would help too much, as you wouldn't be able to distinguish between spu_save samples and samples from generated stubs that are executing from the stack. -Maynard Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Sunday 04 February 2007 00:49, Maynard Johnson wrote: I seem to recall looking at this option a while back, but didn't go that route since struct spu_context is opaque to me. With such a teqnique, I could then use a simple 16-element array of pointers to cached_info objects, creating them as needed when spu_context-profile_private is NULL. I suppose the better option for now is to add a get_profile_private() function to SPUFs, rather than requiring spu_context to be visible. Yes, that sounds good. Note that the file providing the spufs_get_profile_private (and respective spufs_set_profile_private) functions needs to be compiled into the kernel then in case oprofile gets linked in but spufs is a module. Hmm . . . we already depend on the register/unregister functions in sched.c, so my patch changes the oprofile Kconfig to default to 'm' and 'depends on SPU_FS'. I think it would also be necessary to have another interface for cleaning up this data when spufs destroys the context. That could possibly a variation of the existing notifier call, or a new call, or you establish the convention that if the private pointer is non-NULL, spufs will kfree it. Yes, I was thnking along the lines of your last suggestion. I presume OProfile gets notified (object_id == 0) before the context is actually destroyed. At that time, we would NULL-out the reference to the cached_info, so then SPUFS would kfree it at destroy time. -Maynard Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Add support to OProfile for profiling Cell BE SPUs [snip] + * + * Ideally, we would like to be able to create the cached_info for + * an SPU task just one time -- when libspe first loads the SPU + * binary file. We would store the cached_info in a list. Then, as + * SPU tasks are switched out and new ones switched in, the cached_info + * for inactive tasks would be kept, and the active one would be placed + * at the head of the list. But this technique may not with + * current spufs functionality since the spu used in bind_context may + * be a different spu than was used in a previous bind_context for a + * reactivated SPU task. Additionally, a reactivated SPU task may be + * assigned to run on a different physical SPE. We will investigate + * further if this can be done. + * + */ You should stuff a pointer to cached_info into struct spu_context, e.g. 'void *profile_private'. I seem to recall looking at this option a while back, but didn't go that route since struct spu_context is opaque to me. With such a teqnique, I could then use a simple 16-element array of pointers to cached_info objects, creating them as needed when spu_context->profile_private is NULL. I suppose the better option for now is to add a get_profile_private() function to SPUFs, rather than requiring spu_context to be visible. Don't know why I didn't think to do that before. Ah, well, live and learn. -Maynard +struct cached_info { + vma_map_t * map; + struct spu * the_spu; + struct kref cache_ref; + struct list_head list; +}; And replace the 'the_spu' member with a back pointer to the spu_context if you need it. Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Friday 02 February 2007 17:47, Maynard Johnson wrote: We also want to be able to profile the context switch code itself, which means that we also need one event buffer associated with the kernel to collect events that for a zero context_id. The hardware design precludes tracing both SPU and PPU simultaneously. I mean the SPU-side part of the context switch code, which you can find in arch/powerpc/platforms/cell/spufs/spu_{save,restore}*. This code is the one that runs when context_id == 0 is passed to the callback. I presume you mean 'object_id'. What you're asking for is a new requirement, and one which I don't believe is achievable in the current timeframe. Since this is spufs code that's dynamicaly loaded into the SPU at runtime, the symbol information for this code is not accessible to the userspace post-processing tools. It would require an altogether different mechanism to record samples along with necessary information, not to mention the changes required in the post-processing tools. This will have to be a future enhancement. -Maynard Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Friday 02 February 2007 17:47, Maynard Johnson wrote: We also want to be able to profile the context switch code itself, which means that we also need one event buffer associated with the kernel to collect events that for a zero context_id. The hardware design precludes tracing both SPU and PPU simultaneously. I mean the SPU-side part of the context switch code, which you can find in arch/powerpc/platforms/cell/spufs/spu_{save,restore}*. This code is the one that runs when context_id == 0 is passed to the callback. I presume you mean 'object_id'. What you're asking for is a new requirement, and one which I don't believe is achievable in the current timeframe. Since this is spufs code that's dynamicaly loaded into the SPU at runtime, the symbol information for this code is not accessible to the userspace post-processing tools. It would require an altogether different mechanism to record samples along with necessary information, not to mention the changes required in the post-processing tools. This will have to be a future enhancement. -Maynard Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Add support to OProfile for profiling Cell BE SPUs [snip] + * + * Ideally, we would like to be able to create the cached_info for + * an SPU task just one time -- when libspe first loads the SPU + * binary file. We would store the cached_info in a list. Then, as + * SPU tasks are switched out and new ones switched in, the cached_info + * for inactive tasks would be kept, and the active one would be placed + * at the head of the list. But this technique may not with + * current spufs functionality since the spu used in bind_context may + * be a different spu than was used in a previous bind_context for a + * reactivated SPU task. Additionally, a reactivated SPU task may be + * assigned to run on a different physical SPE. We will investigate + * further if this can be done. + * + */ You should stuff a pointer to cached_info into struct spu_context, e.g. 'void *profile_private'. I seem to recall looking at this option a while back, but didn't go that route since struct spu_context is opaque to me. With such a teqnique, I could then use a simple 16-element array of pointers to cached_info objects, creating them as needed when spu_context-profile_private is NULL. I suppose the better option for now is to add a get_profile_private() function to SPUFs, rather than requiring spu_context to be visible. Don't know why I didn't think to do that before. Ah, well, live and learn. -Maynard +struct cached_info { + vma_map_t * map; + struct spu * the_spu; + struct kref cache_ref; + struct list_head list; +}; And replace the 'the_spu' member with a back pointer to the spu_context if you need it. Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Tuesday 30 January 2007 22:41, Maynard Johnson wrote: Arnd Bergmann wrote: + kt = ktime_set(0, profiling_interval); + if (!spu_prof_running) + goto STOP; + hrtimer_forward(timer, timer->base->get_time(), kt); + return HRTIMER_RESTART; is hrtimer_forward really the right interface here? You are ignoring the number of overruns anyway, so hrtimer_start(,,) sounds more correct to me. According to Tom Gleixner, "hrtimer_forward is a convenience function to move the expiry time of a timer forward in multiples of the interval, so it is in the future. After setting the expiry time you restart the timer either with [sic] a return HRTIMER_RESTART (if you are in the timer callback function)." Ok, I see. Have you seen the timer actually coming in late, resulting in hrtimer_forward returning non-zero? I guess it's not a big problem for statistic data collection if that happens, but you might still want to be able to see it. I don't think there's much point in knowing if we have overrun(s). We're not going to schedule the timer any differently. We want to keep the timer interrupts as consistent as possible according to the user's request. + /* Since cpufreq_quick_get returns frequency in kHz, we use +* USEC_PER_SEC here vs NSEC_PER_SEC. +*/ + unsigned long nsPerCyc = (USEC_PER_SEC << SCALE_SHIFT)/khzfreq; + profiling_interval = (nsPerCyc * cycles_reset) >> SCALE_SHIFT; + + pr_debug("timer resolution: %lu\n", +TICK_NSEC); Don't you need to adapt the profiling_interval at run time, when cpufreq changes the core frequency? You should probably use cpufreq_register_notifier() to update this. Since OProfile is a statistical profiler, the exact frequency is not critical. The user is going to be looking for hot spots in their code, so it's all relative. With that said, I don't imagine using the cpufreq notiication would be a big deal. We'll look at it. @@ -480,7 +491,22 @@ struct op_system_config *sys, int num_ctrs) { int i, j, cpu; + spu_cycle_reset = 0; + /* The cpufreq_quick_get function requires that cbe_cpufreq module +* be loaded. This function is not actually provided and exported +* by cbe_cpufreq, but it relies on cbe_cpufreq initialize kernel +* data structures. Since there's no way for depmod to realize +* that our OProfile module depends on cbe_cpufreq, we currently +* are letting the userspace tool, opcontrol, ensure that the +* cbe_cpufreq module is loaded. +*/ + khzfreq = cpufreq_quick_get(smp_processor_id()); You should probably have a fallback in here in case the cpufreq module is not loaded. There is a global variable ppc_proc_freq (in Hz) that you can access. Our userspace tool ensures the cpufreq module is loaded. You should not rely on user space tools to do the right thing in the kernel. Ok, we'll look at the fallback option you suggest. I don't recall if I even knew about ppc_proc_freq before or why I originally chose cpufreq_guick_get. Maybe we can do without the cpufreq and use ppc_proc_freq all the time. We'll see . . . Moreover, if the exact frequency is not that important, as you mentioned above, you can probably just hardcode a compile-time constant here. Well, exact frequency isn't critical, but it should, as close as possible, correspond with the user's requested value for "spu cycle reset". + * + * Ideally, we would like to be able to create the cached_info for + * an SPU task just one time -- when libspe first loads the SPU + * binary file. We would store the cached_info in a list. Then, as + * SPU tasks are switched out and new ones switched in, the cached_info + * for inactive tasks would be kept, and the active one would be placed + * at the head of the list. But this technique may not with + * current spufs functionality since the spu used in bind_context may + * be a different spu than was used in a previous bind_context for a + * reactivated SPU task. Additionally, a reactivated SPU task may be + * assigned to run on a different physical SPE. We will investigate + * further if this can be done. + * + */ You should stuff a pointer to cached_info into struct spu_context, e.g. 'void *profile_private'. +struct cached_info { + vma_map_t * map; + struct spu * the_spu; + struct kref cache_ref; + struct list_head list; +}; And replace the 'the_spu' member with a back pointer to the spu_context if you need it. + +/* A data structure for cached information about active SPU tasks. + * Storage is dynamically allocated, sized as + * "number of active nodes multplied by 8". + * The info_list[n] member holds 0 or more + * 'struct cached_info' objects for SPU#=n. + * + * As currently implemented, there will onl
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Tuesday 30 January 2007 23:54, Maynard Johnson wrote: Why do you store them per spu in the first place? The physical spu doesn't have any relevance to this at all, the only data that is per spu is the sample data collected on a profiling interrupt, which you can then copy in the per-context data on a context switch. The sample data is written out to the event buffer on every profiling interrupt. But we don't write out the SPU program counter samples directly to the event buffer. First, we have to find the cached_info for the appropriate SPU context to retrieve the cached vma-to-fileoffset map. Then we do the vma_map_lookup to find the fileoffset corresponding to the SPU PC sample, which we then write out to the event buffer. This is one of the most time-critical pieces of the SPU profiling code, so I used an array to hold the cached_info for fast random access. But as I stated in a code comment above, the negative implication of this current implementation is that the array can only hold the cached_info for currently running SPU tasks. I need to give this some more thought. I've given this some more thought, and I'm coming to the conclusion that a pure array-based implementation for holding cached_info (getting rid of the lists) would work well for the vast majority of cases in which OProfile will be used. Yes, it is true that the mapping of an SPU context to a phsyical spu-numbered array location cannot be guaranteed to stay valid, and that's why I discard the cached_info at that array location when the SPU task is switched out. Yes, it would be terribly inefficient if the same SPU task gets switched back in later and we would have to recreate the cached_info. However, I contend that OProfile users are interested in profiling one application at a time. They are not going to want to muddy the waters with multiple SPU apps running at the same time. I can't think of any reason why someone would conscisouly choose to do that. Any thoughts from the general community, especially OProfile users? Please assume that in the near future we will be scheduling SPU contexts in and out multiple times a second. Even in a single application, you can easily have more contexts than you have physical SPUs. Arnd, thanks for pointing this out. That's definitely a good reason why my simplistic approach won't work. I'll look at other options. The event buffer by definition needs to be per context. If you for some Yes, and it is. Right now, with the current simplistic approach, the context and the physical SPU are kept in sync. reason want to collect the samples per physical SPU during an event interrupt, you should at least make sure that they are copied into the per-context event buffer on a context switch. At the context switch point, you probably also want to drain the hw event counters, so that you account all events correctly. Yeah, that's a good idea. The few extraneous invalid samples would probably never rise above the noise level, but we should do this anyway for completeness. We also want to be able to profile the context switch code itself, which means that we also need one event buffer associated with the kernel to collect events that for a zero context_id. The hardware design precludes tracing both SPU and PPU simultaneously. -Maynard Of course, the recording of raw samples in the per-context buffer does not need to have the dcookies along with it, you can still resolve the pointers when the SPU context gets destroyed (or an object gets unmapped). Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Tuesday 30 January 2007 23:54, Maynard Johnson wrote: Why do you store them per spu in the first place? The physical spu doesn't have any relevance to this at all, the only data that is per spu is the sample data collected on a profiling interrupt, which you can then copy in the per-context data on a context switch. The sample data is written out to the event buffer on every profiling interrupt. But we don't write out the SPU program counter samples directly to the event buffer. First, we have to find the cached_info for the appropriate SPU context to retrieve the cached vma-to-fileoffset map. Then we do the vma_map_lookup to find the fileoffset corresponding to the SPU PC sample, which we then write out to the event buffer. This is one of the most time-critical pieces of the SPU profiling code, so I used an array to hold the cached_info for fast random access. But as I stated in a code comment above, the negative implication of this current implementation is that the array can only hold the cached_info for currently running SPU tasks. I need to give this some more thought. I've given this some more thought, and I'm coming to the conclusion that a pure array-based implementation for holding cached_info (getting rid of the lists) would work well for the vast majority of cases in which OProfile will be used. Yes, it is true that the mapping of an SPU context to a phsyical spu-numbered array location cannot be guaranteed to stay valid, and that's why I discard the cached_info at that array location when the SPU task is switched out. Yes, it would be terribly inefficient if the same SPU task gets switched back in later and we would have to recreate the cached_info. However, I contend that OProfile users are interested in profiling one application at a time. They are not going to want to muddy the waters with multiple SPU apps running at the same time. I can't think of any reason why someone would conscisouly choose to do that. Any thoughts from the general community, especially OProfile users? Please assume that in the near future we will be scheduling SPU contexts in and out multiple times a second. Even in a single application, you can easily have more contexts than you have physical SPUs. Arnd, thanks for pointing this out. That's definitely a good reason why my simplistic approach won't work. I'll look at other options. The event buffer by definition needs to be per context. If you for some Yes, and it is. Right now, with the current simplistic approach, the context and the physical SPU are kept in sync. reason want to collect the samples per physical SPU during an event interrupt, you should at least make sure that they are copied into the per-context event buffer on a context switch. At the context switch point, you probably also want to drain the hw event counters, so that you account all events correctly. Yeah, that's a good idea. The few extraneous invalid samples would probably never rise above the noise level, but we should do this anyway for completeness. We also want to be able to profile the context switch code itself, which means that we also need one event buffer associated with the kernel to collect events that for a zero context_id. The hardware design precludes tracing both SPU and PPU simultaneously. -Maynard Of course, the recording of raw samples in the per-context buffer does not need to have the dcookies along with it, you can still resolve the pointers when the SPU context gets destroyed (or an object gets unmapped). Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Tuesday 30 January 2007 22:41, Maynard Johnson wrote: Arnd Bergmann wrote: + kt = ktime_set(0, profiling_interval); + if (!spu_prof_running) + goto STOP; + hrtimer_forward(timer, timer-base-get_time(), kt); + return HRTIMER_RESTART; is hrtimer_forward really the right interface here? You are ignoring the number of overruns anyway, so hrtimer_start(,,) sounds more correct to me. According to Tom Gleixner, hrtimer_forward is a convenience function to move the expiry time of a timer forward in multiples of the interval, so it is in the future. After setting the expiry time you restart the timer either with [sic] a return HRTIMER_RESTART (if you are in the timer callback function). Ok, I see. Have you seen the timer actually coming in late, resulting in hrtimer_forward returning non-zero? I guess it's not a big problem for statistic data collection if that happens, but you might still want to be able to see it. I don't think there's much point in knowing if we have overrun(s). We're not going to schedule the timer any differently. We want to keep the timer interrupts as consistent as possible according to the user's request. + /* Since cpufreq_quick_get returns frequency in kHz, we use +* USEC_PER_SEC here vs NSEC_PER_SEC. +*/ + unsigned long nsPerCyc = (USEC_PER_SEC SCALE_SHIFT)/khzfreq; + profiling_interval = (nsPerCyc * cycles_reset) SCALE_SHIFT; + + pr_debug(timer resolution: %lu\n, +TICK_NSEC); Don't you need to adapt the profiling_interval at run time, when cpufreq changes the core frequency? You should probably use cpufreq_register_notifier() to update this. Since OProfile is a statistical profiler, the exact frequency is not critical. The user is going to be looking for hot spots in their code, so it's all relative. With that said, I don't imagine using the cpufreq notiication would be a big deal. We'll look at it. @@ -480,7 +491,22 @@ struct op_system_config *sys, int num_ctrs) { int i, j, cpu; + spu_cycle_reset = 0; + /* The cpufreq_quick_get function requires that cbe_cpufreq module +* be loaded. This function is not actually provided and exported +* by cbe_cpufreq, but it relies on cbe_cpufreq initialize kernel +* data structures. Since there's no way for depmod to realize +* that our OProfile module depends on cbe_cpufreq, we currently +* are letting the userspace tool, opcontrol, ensure that the +* cbe_cpufreq module is loaded. +*/ + khzfreq = cpufreq_quick_get(smp_processor_id()); You should probably have a fallback in here in case the cpufreq module is not loaded. There is a global variable ppc_proc_freq (in Hz) that you can access. Our userspace tool ensures the cpufreq module is loaded. You should not rely on user space tools to do the right thing in the kernel. Ok, we'll look at the fallback option you suggest. I don't recall if I even knew about ppc_proc_freq before or why I originally chose cpufreq_guick_get. Maybe we can do without the cpufreq and use ppc_proc_freq all the time. We'll see . . . Moreover, if the exact frequency is not that important, as you mentioned above, you can probably just hardcode a compile-time constant here. Well, exact frequency isn't critical, but it should, as close as possible, correspond with the user's requested value for spu cycle reset. + * + * Ideally, we would like to be able to create the cached_info for + * an SPU task just one time -- when libspe first loads the SPU + * binary file. We would store the cached_info in a list. Then, as + * SPU tasks are switched out and new ones switched in, the cached_info + * for inactive tasks would be kept, and the active one would be placed + * at the head of the list. But this technique may not with + * current spufs functionality since the spu used in bind_context may + * be a different spu than was used in a previous bind_context for a + * reactivated SPU task. Additionally, a reactivated SPU task may be + * assigned to run on a different physical SPE. We will investigate + * further if this can be done. + * + */ You should stuff a pointer to cached_info into struct spu_context, e.g. 'void *profile_private'. +struct cached_info { + vma_map_t * map; + struct spu * the_spu; + struct kref cache_ref; + struct list_head list; +}; And replace the 'the_spu' member with a back pointer to the spu_context if you need it. + +/* A data structure for cached information about active SPU tasks. + * Storage is dynamically allocated, sized as + * number of active nodes multplied by 8. + * The info_list[n] member holds 0 or more + * 'struct cached_info' objects for SPU#=n. + * + * As currently implemented, there will only ever be one cached_info + * in the list for a given SPU. If we can devise
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Benjamin Herrenschmidt wrote: I've given this some more thought, and I'm coming to the conclusion that a pure array-based implementation for holding cached_info (getting rid of the lists) would work well for the vast majority of cases in which OProfile will be used. Yes, it is true that the mapping of an SPU context to a phsyical spu-numbered array location cannot be guaranteed to stay valid, and that's why I discard the cached_info at that array location when the SPU task is switched out. Yes, it would be terribly inefficient if the same SPU task gets switched back in later and we would have to recreate the cached_info. However, I contend that OProfile users are interested in profiling one application at a time. They are not going to want to muddy the waters with multiple SPU apps running at the same time. I can't think of any reason why someone would conscisouly choose to do that. Any thoughts from the general community, especially OProfile users? Well, it's my understanding that quite a few typical usage scenario involve different tasks running on different SPUs passing each other data around. That shouldn't be a problem. I would consider this to be "one large application" consisting of multiple SPU binaries running simultaneously. Such a scenario can be handled with no negative performance impact using a simple 16 element array of cached_info objects -- as long as there isn't (much) SPU task switching being done. -Maynard Ben. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Christoph Hellwig wrote: On Tue, Jan 30, 2007 at 06:53:50PM +1100, Benjamin Herrenschmidt wrote: +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; I haven't had time to look in details yet but in that context, what does "vma" stands for ? There's already an important vm data structure in linux routinely called "vma" and thus I suspect this is a poor naming choice as it will cause confusion. It looks like it actually is dealing with vma to me. But then again: - please don't use typedefs for structures - there might be a more descriptive name for this than just vma_map Yes, I'll come up with some (hopefully) better name. - Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys - and earn cash http://www.techsay.com/default.php?page=join.php=sourceforge=DEVDEV ___ oprofile-list mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/oprofile-list - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Maynard Johnson wrote: Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson <[EMAIL PROTECTED]> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> [snip] + * + * Ideally, we would like to be able to create the cached_info for + * an SPU task just one time -- when libspe first loads the SPU + * binary file. We would store the cached_info in a list. Then, as + * SPU tasks are switched out and new ones switched in, the cached_info + * for inactive tasks would be kept, and the active one would be placed + * at the head of the list. But this technique may not with + * current spufs functionality since the spu used in bind_context may + * be a different spu than was used in a previous bind_context for a + * reactivated SPU task. Additionally, a reactivated SPU task may be + * assigned to run on a different physical SPE. We will investigate + * further if this can be done. + * + */ You should stuff a pointer to cached_info into struct spu_context, e.g. 'void *profile_private'. +struct cached_info { + vma_map_t * map; + struct spu * the_spu; + struct kref cache_ref; + struct list_head list; +}; And replace the 'the_spu' member with a back pointer to the spu_context if you need it. + +/* A data structure for cached information about active SPU tasks. + * Storage is dynamically allocated, sized as + * "number of active nodes multplied by 8". + * The info_list[n] member holds 0 or more + * 'struct cached_info' objects for SPU#=n. + * + * As currently implemented, there will only ever be one cached_info + * in the list for a given SPU. If we can devise a way to maintain + * multiple cached_infos in our list, then it would make sense + * to also cache the dcookie representing the PPU task application. + * See above description of struct cached_info for more details. + */ +struct spu_info_stacks { + struct list_head * info_list; +}; Why do you store pointers to list_head structures? If you want to store lists, you should have a lists_head itself in here. info_list is an array of n lists, where n is the number of SPUs. Why do you store them per spu in the first place? The physical spu doesn't have any relevance to this at all, the only data that is per spu is the sample data collected on a profiling interrupt, which you can then copy in the per-context data on a context switch. The sample data is written out to the event buffer on every profiling interrupt. But we don't write out the SPU program counter samples directly to the event buffer. First, we have to find the cached_info for the appropriate SPU context to retrieve the cached vma-to-fileoffset map. Then we do the vma_map_lookup to find the fileoffset corresponding to the SPU PC sample, which we then write out to the event buffer. This is one of the most time-critical pieces of the SPU profiling code, so I used an array to hold the cached_info for fast random access. But as I stated in a code comment above, the negative implication of this current implementation is that the array can only hold the cached_info for currently running SPU tasks. I need to give this some more thought. I've given this some more thought, and I'm coming to the conclusion that a pure array-based implementation for holding cached_info (getting rid of the lists) would work well for the vast majority of cases in which OProfile will be used. Yes, it is true that the mapping of an SPU context to a phsyical spu-numbered array location cannot be guaranteed to stay valid, and that's why I discard the cached_info at that array location when the SPU task is switched out. Yes, it would be terribly inefficient if the same SPU task gets switched back in later and we would have to recreate the cached_info. However, I contend that OProfile users are interested in profiling one application at a time. They are not going to want to muddy the waters with multiple SPU apps running at the same time. I can't think of any reason why someone would conscisouly choose to do that. Any thoughts from the general community, especially OProfile users? Thanks. -Maynard [snip] - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson <[EMAIL PROTECTED]> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> I can't really say much about the common oprofile files that you are touching, maybe someone from oprofile-list (Philippe?) to look over them and ack/nack them. Anton (added to cc list) may be another good reviewer of drivers/oprofile changes. +#define number_of_online_nodes(nodes) {\ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp > nodes) \ +nodes++; \ + } \ +} I've been discussing with benh about a better way to do this. We should change all the references to nodes and cpu numbers to something more correct in the future, so we get rid of the assumption that each numa node is a cell chip. It's probably best to leave your code as is for now, but we need to remember this one when cleaning it up. You should however convert this into an inline function of prototype static inline int number_of_online_nodes(void) instead of a macro. OK. +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; please don't typedef structures. Sure. [snip] + +static int spu_prof_running = 0; +static unsigned int profiling_interval = 0; + +extern int num_nodes; +extern unsigned int khzfreq; You really can't have global variable with such generic names. For static variables, it's less of a problem, since they are not in the same name space, but for easier debugging, it's good to always have the name of your module (e.g. spu_prof_) as a prefix to the identifier. OK, we'll add 'spu_prof' prefix to them. Of course, the best way would be to avoid global and static variables entirely, but that's not always possible. [snip] + // process the collected SPU PC for each node + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + node = cbe_cpu_to_node(cpu); + node_factor = node * SPUS_PER_NODE; +/* number of valid entries for this node */ + entry = 0; + + trace_addr = cbe_read_pm(cpu, trace_address); + while ((trace_addr & CBE_PM_TRACE_BUF_EMPTY) != 0x400) + { + /* there is data in the trace buffer to process */ + cbe_read_trace_buffer(cpu, trace_buffer); + spu_mask = 0x; + + /* Each SPU PC is 16 bits; hence, four spus in each of +* the two 64-bit buffer entries that make up the +* 128-bit trace_buffer entry. Process the upper and +* lower 64-bit values simultaneously. +*/ + for (spu = 0; spu < SPUS_PER_TB_ENTRY; spu++) { + spu_pc_lower = spu_mask & trace_buffer[0]; + spu_pc_lower = spu_pc_lower >> (NUM_SPU_BITS_TRBUF + * (SPUS_PER_TB_ENTRY-spu-1)); + + spu_pc_upper = spu_mask & trace_buffer[1]; + spu_pc_upper = spu_pc_upper >> (NUM_SPU_BITS_TRBUF + * (SPUS_PER_TB_ENTRY-spu-1)); + + spu_mask = spu_mask >> NUM_SPU_BITS_TRBUF; + + /* spu PC trace entry is upper 16 bits of the +* 18 bit SPU program counter +*/ + spu_pc_lower = spu_pc_lower << 2; + spu_pc_upper = spu_pc_upper << 2; + + samples[((node_factor + spu) * TRACE_ARRAY_SIZE) + entry] + = (u32) spu_pc_lower; + samples[((node_factor + spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE) + e
Re: [Cbe-oss-dev] [RFC, PATCH 3/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> I looked through it again, and think I found a serious bug, but that should be easy enough to solve: +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes +* sees their notify_active flag is set, they will call +* spu_switch_notify(); +*/ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); +list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; [side note] There is a small whitespace breakage in here, please make sure you always use tabs for indenting, not space characters. [/side note] @@ -45,9 +45,10 @@ u64 pte_fault; *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx->spu; + if (ctx->state != SPU_STATE_RUNNABLE || spu->notify_active) + return 1; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; @@ -305,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct spu * spu; u32 status; if (down_interruptible(>run_sema)) @@ -318,8 +320,16 @@ do { ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, )); + spu = ctx->spu; if (unlikely(ret)) break; + if (unlikely(spu->notify_active)) { + spu->notify_active = 0; + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { + spu_switch_notify(spu, ctx); + continue; + } + } This is before spu_reacquire_runnable, so in case the spu got preempted at the same time when oprofile was enabled, ctx->spu is NULL, and you can't load the notify_active flag from it. On solution would be to move the notify_active flag from ctx->spu into ctx itself, but maybe there are other ways to solve this. In an earlier review of this patch, Christopher Hellwig suggested I move the notify_active flag to be a bit in the sched_flags field that's added in his scheduler patch series. If this patch series will be a available in an "Arnd" tree that we'll be using for our current OProfile development, perhaps I should wait until that time to change this, since the window of vulnerability is quite small. What do you think? -Maynard Thanks, Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 3/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] I looked through it again, and think I found a serious bug, but that should be easy enough to solve: +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes +* sees their notify_active flag is set, they will call +* spu_switch_notify(); +*/ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); +list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; [side note] There is a small whitespace breakage in here, please make sure you always use tabs for indenting, not space characters. [/side note] @@ -45,9 +45,10 @@ u64 pte_fault; *stat = ctx-ops-status_read(ctx); - if (ctx-state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx-spu; + if (ctx-state != SPU_STATE_RUNNABLE || spu-notify_active) + return 1; pte_fault = spu-dsisr (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat 0x1) || pte_fault || spu-class_0_pending) ? 1 : 0; @@ -305,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct spu * spu; u32 status; if (down_interruptible(ctx-run_sema)) @@ -318,8 +320,16 @@ do { ret = spufs_wait(ctx-stop_wq, spu_stopped(ctx, status)); + spu = ctx-spu; if (unlikely(ret)) break; + if (unlikely(spu-notify_active)) { + spu-notify_active = 0; + if (!(status SPU_STATUS_STOPPED_BY_STOP)) { + spu_switch_notify(spu, ctx); + continue; + } + } This is before spu_reacquire_runnable, so in case the spu got preempted at the same time when oprofile was enabled, ctx-spu is NULL, and you can't load the notify_active flag from it. On solution would be to move the notify_active flag from ctx-spu into ctx itself, but maybe there are other ways to solve this. In an earlier review of this patch, Christopher Hellwig suggested I move the notify_active flag to be a bit in the sched_flags field that's added in his scheduler patch series. If this patch series will be a available in an Arnd tree that we'll be using for our current OProfile development, perhaps I should wait until that time to change this, since the window of vulnerability is quite small. What do you think? -Maynard Thanks, Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson [EMAIL PROTECTED] This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] I can't really say much about the common oprofile files that you are touching, maybe someone from oprofile-list (Philippe?) to look over them and ack/nack them. Anton (added to cc list) may be another good reviewer of drivers/oprofile changes. +#define number_of_online_nodes(nodes) {\ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp nodes) \ +nodes++; \ + } \ +} I've been discussing with benh about a better way to do this. We should change all the references to nodes and cpu numbers to something more correct in the future, so we get rid of the assumption that each numa node is a cell chip. It's probably best to leave your code as is for now, but we need to remember this one when cleaning it up. You should however convert this into an inline function of prototype static inline int number_of_online_nodes(void) instead of a macro. OK. +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; please don't typedef structures. Sure. [snip] + +static int spu_prof_running = 0; +static unsigned int profiling_interval = 0; + +extern int num_nodes; +extern unsigned int khzfreq; You really can't have global variable with such generic names. For static variables, it's less of a problem, since they are not in the same name space, but for easier debugging, it's good to always have the name of your module (e.g. spu_prof_) as a prefix to the identifier. OK, we'll add 'spu_prof' prefix to them. Of course, the best way would be to avoid global and static variables entirely, but that's not always possible. [snip] + // process the collected SPU PC for each node + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + node = cbe_cpu_to_node(cpu); + node_factor = node * SPUS_PER_NODE; +/* number of valid entries for this node */ + entry = 0; + + trace_addr = cbe_read_pm(cpu, trace_address); + while ((trace_addr CBE_PM_TRACE_BUF_EMPTY) != 0x400) + { + /* there is data in the trace buffer to process */ + cbe_read_trace_buffer(cpu, trace_buffer); + spu_mask = 0x; + + /* Each SPU PC is 16 bits; hence, four spus in each of +* the two 64-bit buffer entries that make up the +* 128-bit trace_buffer entry. Process the upper and +* lower 64-bit values simultaneously. +*/ + for (spu = 0; spu SPUS_PER_TB_ENTRY; spu++) { + spu_pc_lower = spu_mask trace_buffer[0]; + spu_pc_lower = spu_pc_lower (NUM_SPU_BITS_TRBUF + * (SPUS_PER_TB_ENTRY-spu-1)); + + spu_pc_upper = spu_mask trace_buffer[1]; + spu_pc_upper = spu_pc_upper (NUM_SPU_BITS_TRBUF + * (SPUS_PER_TB_ENTRY-spu-1)); + + spu_mask = spu_mask NUM_SPU_BITS_TRBUF; + + /* spu PC trace entry is upper 16 bits of the +* 18 bit SPU program counter +*/ + spu_pc_lower = spu_pc_lower 2; + spu_pc_upper = spu_pc_upper 2; + + samples[((node_factor + spu) * TRACE_ARRAY_SIZE) + entry] + = (u32) spu_pc_lower; + samples[((node_factor + spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE) + entry] + = (u32) spu_pc_upper
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Maynard Johnson wrote: Arnd Bergmann wrote: On Monday 29 January 2007 20:48, Maynard Johnson wrote: Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson [EMAIL PROTECTED] This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] [snip] + * + * Ideally, we would like to be able to create the cached_info for + * an SPU task just one time -- when libspe first loads the SPU + * binary file. We would store the cached_info in a list. Then, as + * SPU tasks are switched out and new ones switched in, the cached_info + * for inactive tasks would be kept, and the active one would be placed + * at the head of the list. But this technique may not with + * current spufs functionality since the spu used in bind_context may + * be a different spu than was used in a previous bind_context for a + * reactivated SPU task. Additionally, a reactivated SPU task may be + * assigned to run on a different physical SPE. We will investigate + * further if this can be done. + * + */ You should stuff a pointer to cached_info into struct spu_context, e.g. 'void *profile_private'. +struct cached_info { + vma_map_t * map; + struct spu * the_spu; + struct kref cache_ref; + struct list_head list; +}; And replace the 'the_spu' member with a back pointer to the spu_context if you need it. + +/* A data structure for cached information about active SPU tasks. + * Storage is dynamically allocated, sized as + * number of active nodes multplied by 8. + * The info_list[n] member holds 0 or more + * 'struct cached_info' objects for SPU#=n. + * + * As currently implemented, there will only ever be one cached_info + * in the list for a given SPU. If we can devise a way to maintain + * multiple cached_infos in our list, then it would make sense + * to also cache the dcookie representing the PPU task application. + * See above description of struct cached_info for more details. + */ +struct spu_info_stacks { + struct list_head * info_list; +}; Why do you store pointers to list_head structures? If you want to store lists, you should have a lists_head itself in here. info_list is an array of n lists, where n is the number of SPUs. Why do you store them per spu in the first place? The physical spu doesn't have any relevance to this at all, the only data that is per spu is the sample data collected on a profiling interrupt, which you can then copy in the per-context data on a context switch. The sample data is written out to the event buffer on every profiling interrupt. But we don't write out the SPU program counter samples directly to the event buffer. First, we have to find the cached_info for the appropriate SPU context to retrieve the cached vma-to-fileoffset map. Then we do the vma_map_lookup to find the fileoffset corresponding to the SPU PC sample, which we then write out to the event buffer. This is one of the most time-critical pieces of the SPU profiling code, so I used an array to hold the cached_info for fast random access. But as I stated in a code comment above, the negative implication of this current implementation is that the array can only hold the cached_info for currently running SPU tasks. I need to give this some more thought. I've given this some more thought, and I'm coming to the conclusion that a pure array-based implementation for holding cached_info (getting rid of the lists) would work well for the vast majority of cases in which OProfile will be used. Yes, it is true that the mapping of an SPU context to a phsyical spu-numbered array location cannot be guaranteed to stay valid, and that's why I discard the cached_info at that array location when the SPU task is switched out. Yes, it would be terribly inefficient if the same SPU task gets switched back in later and we would have to recreate the cached_info. However, I contend that OProfile users are interested in profiling one application at a time. They are not going to want to muddy the waters with multiple SPU apps running at the same time. I can't think of any reason why someone would conscisouly choose to do that. Any thoughts from the general community, especially OProfile users? Thanks. -Maynard [snip] - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Christoph Hellwig wrote: On Tue, Jan 30, 2007 at 06:53:50PM +1100, Benjamin Herrenschmidt wrote: +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; I haven't had time to look in details yet but in that context, what does vma stands for ? There's already an important vm data structure in linux routinely called vma and thus I suspect this is a poor naming choice as it will cause confusion. It looks like it actually is dealing with vma to me. But then again: - please don't use typedefs for structures - there might be a more descriptive name for this than just vma_map Yes, I'll come up with some (hopefully) better name. - Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT business topics through brief surveys - and earn cash http://www.techsay.com/default.php?page=join.phpp=sourceforgeCID=DEVDEV ___ oprofile-list mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/oprofile-list - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Benjamin Herrenschmidt wrote: I've given this some more thought, and I'm coming to the conclusion that a pure array-based implementation for holding cached_info (getting rid of the lists) would work well for the vast majority of cases in which OProfile will be used. Yes, it is true that the mapping of an SPU context to a phsyical spu-numbered array location cannot be guaranteed to stay valid, and that's why I discard the cached_info at that array location when the SPU task is switched out. Yes, it would be terribly inefficient if the same SPU task gets switched back in later and we would have to recreate the cached_info. However, I contend that OProfile users are interested in profiling one application at a time. They are not going to want to muddy the waters with multiple SPU apps running at the same time. I can't think of any reason why someone would conscisouly choose to do that. Any thoughts from the general community, especially OProfile users? Well, it's my understanding that quite a few typical usage scenario involve different tasks running on different SPUs passing each other data around. That shouldn't be a problem. I would consider this to be one large application consisting of multiple SPU binaries running simultaneously. Such a scenario can be handled with no negative performance impact using a simple 16 element array of cached_info objects -- as long as there isn't (much) SPU task switching being done. -Maynard Ben. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson <[EMAIL PROTECTED]> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig === --- linux-2.6.20-rc1.orig/arch/powerpc/configs/cell_defconfig 2007-01-18 16:43:14.230540320 -0600 +++ linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig 2007-01-29 10:32:03.386789608 -0600 @@ -1403,7 +1403,7 @@ # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m # CONFIG_KPROBES is not set # Index: linux-2.6.20-rc1/arch/powerpc/oprofile/cell/pr_util.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.20-rc1/arch/powerpc/oprofile/cell/pr_util.h 2007-01-29 10:32:03.388789304 -0600 @@ -0,0 +1,75 @@ + /* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson <[EMAIL PROTECTED]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef PR_UTIL_H +#define PR_UTIL_H + +#include +#include +#include +#include + +#define number_of_online_nodes(nodes) {\ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp > nodes) \ +nodes++; \ + } \ +} + + +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; + +/* The three functions below are for maintaining and accessing + * the vma-to-file offset map. + */ +vma_map_t * create_vma_map(const struct spu * spu, u64 objectid); +unsigned int vma_map_lookup(vma_map_t *map, unsigned int vma, + const struct spu * aSpu); +void vma_map_free(struct vma_map *map); + +/* + * Entry point for SPU profiling. + * cycles_reset is the SPU_CYCLES count value specified by the user. + */ +void start_spu_profiling(unsigned int cycles_reset); + +void stop_spu_profiling(void); + + +/* add the necessary profiling hooks */ +int spu_sync_start(void); + +/* remove the hooks */ +int spu_sync_stop(void); + +/* Record SPU program counter samples to the oprofile event buffer. */ +void spu_sync_buffer(int spu_num, unsigned int * samples, + int num_samples); + +#endif// PR_UTIL_H Index: linux-2.6.20-rc1/arch/powerpc/oprofile/cell/spu_profiler.c === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.20-rc1/arch/powerpc/oprofile/cell/spu_profiler.c 2007-01-29 10:32:03.392788696 -0600 @@ -0,0 +1,204 @@ +/* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Authors: Maynard Johnson <[EMAIL PROTECTED]> + * Carl Love <[EMAIL PROTECTED]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include "pr_util.h" + +#define TRACE_ARRAY_SIZE 1024 +static u32 * samples; +static u32 * samples_per_node; + +static int spu_prof_running = 0; +static unsigned int profiling_interval = 0; + +extern int num_nodes; +extern unsigned int khzfreq; + +/* + * Oprofile setup functions + */ + +#define NUM_SPU_BITS_TRBUF 16 +#define SPUS_PER_TB_ENTRY 4 +#define SPUS_PER_NODE 8 + +/* + * Collect the SPU program counter samples from the trace buffer. + * The global variable usage is as follows: + *samples[][TRACE_ARRAY_SIZE] - array to store SPU PC samples + * Assumption, the array will be all zeros on entry. + *u32 samples_per_node[num_nodes] - array of how many valid samples per node + */ +static void cell_spu_pc_collection(void) +{ + int cpu; + int node; + int spu; + u32 trace_addr; +/* the trace buffer is 128 bits */ + u64 trace_buffer[2]; + u64 spu_pc_lower; + u64 spu_pc_upper; + u64 spu_mask; + i
[RFC, PATCH 3/4] Add support to OProfile for profiling Cell BE SPUs -- update
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-18 16:43:14.324526032 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-26 16:16:35.219668640 -0600 @@ -78,21 +78,46 @@ static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); -static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(_switch_notifier, ctx ? ctx->object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_switch_notify(); + */ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); +list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; + spu->notify_active = 1; + wake_up_all(>stop_wq); + } +mutex_unlock(_prio->active_mutex[node]); + } +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-26 16:26:49.733703448 -0600 @@ -180,6 +180,7 @@ int spu_activate(struct spu_context *ctx, u64 flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); +void spu_switch_notify(struct spu *spu, struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c 2007-01-26 16:24:38.979744856 -0600 @@ -45,9 +45,10 @@ u64 pte_fault; *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx->spu; + if (ctx->state != SPU_STATE_RUNNABLE || spu->notify_active) + return 1; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; @@ -305,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct spu * spu; u32 status; if (down_interruptible(>run_sema)) @@ -318,8 +320,16 @@ do { ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, )); + spu = ctx->spu; if (unlikely(ret)) break; + if (unlikely(spu->notify_active)) { + spu->notify_active = 0; + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { +spu_switch_notify(spu, ctx); +continue; + } + } if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); Index: linux-2.6.20-rc1/include/asm-powerpc/spu.h === --- linux-2.6.20-rc1.orig/include/asm-powerpc/spu.h 2007-01-18 16:43:19.932605072 -0600 +++ linux-2.6.20-rc1/include/asm-powerpc/spu.h 2007-01-24 12:17:30.209676992 -0600 @@ -144,6 +144,7 @@ void* pdata; /* platform private data */ struct sys_device sysdev; + int notify_active; }; struct spu *spu_alloc(void);
[RFC, PATCH 2/4] Add support to OProfile for profiling Cell BE SPUs -- update
The code was setting up the debug bus for group 21 when profiling on the event PPU CYCLES. The debug bus is not actually used by the hardware performance counters when counting PPU CYCLES. Setting up the debug bus for PPU CYCLES causes signal routing conflicts on the debug bus when profiling PPU cycles and another PPU event. This patch fixes the code to only setup the debug bus to route the performance signals for the non PPU CYCLE events. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Signed-off-by: Carl Love <[EMAIL PROTECTED]> Index: linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c === --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/op_model_cell.c 2007-01-18 16:56:47.300605984 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c 2007-01-24 12:16:16.225609136 -0600 @@ -39,6 +39,9 @@ #include "../platforms/cell/interrupt.h" #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ +#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying + * PPU_CYCLES event + */ #define CBE_COUNT_ALL_CYCLES 0x4280 /* PPU cycle event specifier */ #define NUM_THREADS 2 /* number of physical threads in @@ -62,7 +65,7 @@ struct pm_signal { u16 cpu; /* Processor to modify */ u16 sub_unit; /* hw subunit this applies to (if applicable) */ - u16 signal_group; /* Signal Group to Enable/Disable */ + short int signal_group; /* Signal Group to Enable/Disable */ u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event * Bus Word(s) (bitmask) */ @@ -173,26 +176,40 @@ static void pm_rtas_activate_signals(u32 node, u32 count) { int ret; - int j; + int i, j; struct pm_signal pm_signal_local[NR_PHYS_CTRS]; + /* There is no debug setup required for the cycles event. + * Note that only events in the same group can be used. + * Otherwise, there will be conflicts in correctly routing + * the signals on the debug bus. It is the responsiblity + * of the OProfile user tool to check the events are in + * the same group. + */ + i = 0; for (j = 0; j < count; j++) { - /* fw expects physical cpu # */ - pm_signal_local[j].cpu = node; - pm_signal_local[j].signal_group - = pm_signal[j].signal_group; - pm_signal_local[j].bus_word = pm_signal[j].bus_word; - pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; - pm_signal_local[j].bit = pm_signal[j].bit; - } + if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) { - ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, - pm_signal_local, - count * sizeof(struct pm_signal)); + /* fw expects physical cpu # */ + pm_signal_local[i].cpu = node; + pm_signal_local[i].signal_group += pm_signal[j].signal_group; + pm_signal_local[i].bus_word = pm_signal[j].bus_word; + pm_signal_local[i].sub_unit = pm_signal[j].sub_unit; + pm_signal_local[i].bit = pm_signal[j].bit; + i++; + } + } - if (ret) - printk(KERN_WARNING "%s: rtas returned: %d\n", - __FUNCTION__, ret); + if (i != 0) { + ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, + pm_signal_local, + i * sizeof(struct pm_signal)); + + if (ret) + printk(KERN_WARNING "%s: rtas returned: %d\n", + __FUNCTION__, ret); + } } /* @@ -209,7 +226,7 @@ /* Special Event: Count all cpu cycles */ pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; p = &(pm_signal[ctr]); - p->signal_group = 21; + p->signal_group = PPU_CYCLES_GRP_NUM; p->bus_word = 1; p->sub_unit = 0; p->bit = 0;
[RFC, PATCH 1/4] Add support to OProfile for profiling Cell BE SPUs -- update
This is a clean up patch that includes the following changes: -It removes some macro definitions that are only used once with the actual code. -Some comments were added to clarify the code based on feedback from the community. -The write_pm_cntrl() and set_count_mode() were passed a structure element from a global variable. The argument was removed so the functions now just operate on the global directly. -The set_pm_event() function call in the cell_virtual_cntr() routine was moved to a for-loop before the for_each_cpu loop Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c === --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/op_model_cell.c 2007-01-18 16:43:14.428510224 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c 2007-01-18 16:56:47.300605984 -0600 @@ -41,8 +41,12 @@ #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ #define CBE_COUNT_ALL_CYCLES 0x4280 /* PPU cycle event specifier */ -#define NUM_THREADS 2 -#define VIRT_CNTR_SW_TIME_NS 1 // 0.5 seconds +#define NUM_THREADS 2 /* number of physical threads in + * physical processor + */ +#define NUM_TRACE_BUS_WORDS 4 +#define NUM_INPUT_BUS_WORDS 2 + struct pmc_cntrl_data { unsigned long vcntr; @@ -94,14 +98,6 @@ } pm_regs; -#define GET_SUB_UNIT(x) ((x & 0xf000) >> 12) -#define GET_BUS_WORD(x) ((x & 0x00f0) >> 4) -#define GET_BUS_TYPE(x) ((x & 0x0300) >> 8) -#define GET_POLARITY(x) ((x & 0x0002) >> 1) -#define GET_COUNT_CYCLES(x) (x & 0x0001) -#define GET_INPUT_CONTROL(x) ((x & 0x0004) >> 2) - - static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; @@ -129,8 +125,8 @@ static u32 ctr_enabled; -static unsigned char trace_bus[4]; -static unsigned char input_bus[2]; +static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; +static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; /* * Firmware interface functions @@ -183,7 +179,8 @@ for (j = 0; j < count; j++) { /* fw expects physical cpu # */ pm_signal_local[j].cpu = node; - pm_signal_local[j].signal_group = pm_signal[j].signal_group; + pm_signal_local[j].signal_group + = pm_signal[j].signal_group; pm_signal_local[j].bus_word = pm_signal[j].bus_word; pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; pm_signal_local[j].bit = pm_signal[j].bit; @@ -221,24 +218,32 @@ pm_regs.pm07_cntrl[ctr] = 0; } - bus_word = GET_BUS_WORD(unit_mask); - bus_type = GET_BUS_TYPE(unit_mask); - count_cycles = GET_COUNT_CYCLES(unit_mask); - polarity = GET_POLARITY(unit_mask); - input_control = GET_INPUT_CONTROL(unit_mask); + bus_word = (unit_mask & 0x00f0) >> 4; + bus_type = (unit_mask & 0x0300) >> 8; + count_cycles = unit_mask & 0x0001; + polarity = (unit_mask & 0x0002) >> 1; + input_control = (unit_mask & 0x0004) >> 2; signal_bit = (event % 100); p = &(pm_signal[ctr]); p->signal_group = event / 100; p->bus_word = bus_word; - p->sub_unit = unit_mask & 0xf000; + p->sub_unit = (unit_mask & 0xf000) >> 12; pm_regs.pm07_cntrl[ctr] = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); - + pm_regs.pm07_cntrl[ctr] |= (count_cycles & 1) << 23; + pm_regs.pm07_cntrl[ctr] |= (polarity & 1) << 24; + pm_regs.pm07_cntrl[ctr] |= (input_control & 1) << 25; + + /* Some of the islands signal selection is based on 64 bit words. + * The debug bus words are 32 bits, the input words to the performance + * counters are defined as 32 bits. Need to convert the 64 bit island + * specification to the appropriate 32 input bit and bus word for the + * performance counter event selection. See the CELL Performance + * monitoring signals manual and the Perf cntr hardware descriptions + * for the details. + */ if (input_control == 0) { if (signal_bit > 31) { signal_bit -= 32; @@ -253,18 +258,18 @@ if ((bus_type == 1) && p->signal_group >= 50) bus_type = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit); + pm_regs.pm07_cntrl[ctr] |= (signal_bit & 0x3F) << 26; } else { pm_regs.pm07_cntrl[ctr] = 0; p->bit = signal_bit; } - for (i = 0; i < 4; i++) { + for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) { if (bus_word & (1 << i)) { pm_regs.debug_bus_control |= (bus_type << (31 - (2 * i) + 1)); - for (j = 0; j < 2; j++) { + for (j
[RFC, PATCH 0/4] Add support to OProfile for profiling Cell BE SPUs -- update
On December 14, 2006, I posted a patch that added support to the OProfile kernel driver for profiling Cell SPUs. There have been some changes/fixes to this patch since the original posting (including forward porting from 2.6.18-based kernel to 2.6.20-rc1), so I am reposting the patch for review now. This patch relies upon the following patches that have not been accepted yet: 1. oprofile cleanup patch (submitted on Nov 27) 2. Fix for PPU profiling (not submitted yet, since it depends on #1) 3. SPU task notification patch (last submitted on Jan 26) For those who may want to apply and build the oprofile SPU support patch, it would be necessary to first apply the above patches. For convenience, I will post all three of the above patches, along with the oprofile SPU support patch. Comments appreciated. Thank you. Maynard Johnson IBM LTC Toolchain - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC, PATCH 0/4] Add support to OProfile for profiling Cell BE SPUs -- update
On December 14, 2006, I posted a patch that added support to the OProfile kernel driver for profiling Cell SPUs. There have been some changes/fixes to this patch since the original posting (including forward porting from 2.6.18-based kernel to 2.6.20-rc1), so I am reposting the patch for review now. This patch relies upon the following patches that have not been accepted yet: 1. oprofile cleanup patch (submitted on Nov 27) 2. Fix for PPU profiling (not submitted yet, since it depends on #1) 3. SPU task notification patch (last submitted on Jan 26) For those who may want to apply and build the oprofile SPU support patch, it would be necessary to first apply the above patches. For convenience, I will post all three of the above patches, along with the oprofile SPU support patch. Comments appreciated. Thank you. Maynard Johnson IBM LTC Toolchain - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC, PATCH 1/4] Add support to OProfile for profiling Cell BE SPUs -- update
This is a clean up patch that includes the following changes: -It removes some macro definitions that are only used once with the actual code. -Some comments were added to clarify the code based on feedback from the community. -The write_pm_cntrl() and set_count_mode() were passed a structure element from a global variable. The argument was removed so the functions now just operate on the global directly. -The set_pm_event() function call in the cell_virtual_cntr() routine was moved to a for-loop before the for_each_cpu loop Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c === --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/op_model_cell.c 2007-01-18 16:43:14.428510224 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c 2007-01-18 16:56:47.300605984 -0600 @@ -41,8 +41,12 @@ #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ #define CBE_COUNT_ALL_CYCLES 0x4280 /* PPU cycle event specifier */ -#define NUM_THREADS 2 -#define VIRT_CNTR_SW_TIME_NS 1 // 0.5 seconds +#define NUM_THREADS 2 /* number of physical threads in + * physical processor + */ +#define NUM_TRACE_BUS_WORDS 4 +#define NUM_INPUT_BUS_WORDS 2 + struct pmc_cntrl_data { unsigned long vcntr; @@ -94,14 +98,6 @@ } pm_regs; -#define GET_SUB_UNIT(x) ((x 0xf000) 12) -#define GET_BUS_WORD(x) ((x 0x00f0) 4) -#define GET_BUS_TYPE(x) ((x 0x0300) 8) -#define GET_POLARITY(x) ((x 0x0002) 1) -#define GET_COUNT_CYCLES(x) (x 0x0001) -#define GET_INPUT_CONTROL(x) ((x 0x0004) 2) - - static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; @@ -129,8 +125,8 @@ static u32 ctr_enabled; -static unsigned char trace_bus[4]; -static unsigned char input_bus[2]; +static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; +static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; /* * Firmware interface functions @@ -183,7 +179,8 @@ for (j = 0; j count; j++) { /* fw expects physical cpu # */ pm_signal_local[j].cpu = node; - pm_signal_local[j].signal_group = pm_signal[j].signal_group; + pm_signal_local[j].signal_group + = pm_signal[j].signal_group; pm_signal_local[j].bus_word = pm_signal[j].bus_word; pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; pm_signal_local[j].bit = pm_signal[j].bit; @@ -221,24 +218,32 @@ pm_regs.pm07_cntrl[ctr] = 0; } - bus_word = GET_BUS_WORD(unit_mask); - bus_type = GET_BUS_TYPE(unit_mask); - count_cycles = GET_COUNT_CYCLES(unit_mask); - polarity = GET_POLARITY(unit_mask); - input_control = GET_INPUT_CONTROL(unit_mask); + bus_word = (unit_mask 0x00f0) 4; + bus_type = (unit_mask 0x0300) 8; + count_cycles = unit_mask 0x0001; + polarity = (unit_mask 0x0002) 1; + input_control = (unit_mask 0x0004) 2; signal_bit = (event % 100); p = (pm_signal[ctr]); p-signal_group = event / 100; p-bus_word = bus_word; - p-sub_unit = unit_mask 0xf000; + p-sub_unit = (unit_mask 0xf000) 12; pm_regs.pm07_cntrl[ctr] = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); - + pm_regs.pm07_cntrl[ctr] |= (count_cycles 1) 23; + pm_regs.pm07_cntrl[ctr] |= (polarity 1) 24; + pm_regs.pm07_cntrl[ctr] |= (input_control 1) 25; + + /* Some of the islands signal selection is based on 64 bit words. + * The debug bus words are 32 bits, the input words to the performance + * counters are defined as 32 bits. Need to convert the 64 bit island + * specification to the appropriate 32 input bit and bus word for the + * performance counter event selection. See the CELL Performance + * monitoring signals manual and the Perf cntr hardware descriptions + * for the details. + */ if (input_control == 0) { if (signal_bit 31) { signal_bit -= 32; @@ -253,18 +258,18 @@ if ((bus_type == 1) p-signal_group = 50) bus_type = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit); + pm_regs.pm07_cntrl[ctr] |= (signal_bit 0x3F) 26; } else { pm_regs.pm07_cntrl[ctr] = 0; p-bit = signal_bit; } - for (i = 0; i 4; i++) { + for (i = 0; i NUM_TRACE_BUS_WORDS; i++) { if (bus_word (1 i)) { pm_regs.debug_bus_control |= (bus_type (31 - (2 * i) + 1)); - for (j = 0; j 2; j++) { + for (j = 0; j NUM_INPUT_BUS_WORDS; j++) { if (input_bus[j] == 0xff) { input_bus[j] = i; pm_regs.group_control |= @@ -278,52 +283,58 @@ ; } -static void write_pm_cntrl(int cpu, struct pm_cntrl *pm_cntrl) +static void write_pm_cntrl(int cpu) { - /* Oprofile will use 32 bit counters
[RFC, PATCH 2/4] Add support to OProfile for profiling Cell BE SPUs -- update
The code was setting up the debug bus for group 21 when profiling on the event PPU CYCLES. The debug bus is not actually used by the hardware performance counters when counting PPU CYCLES. Setting up the debug bus for PPU CYCLES causes signal routing conflicts on the debug bus when profiling PPU cycles and another PPU event. This patch fixes the code to only setup the debug bus to route the performance signals for the non PPU CYCLE events. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Signed-off-by: Carl Love [EMAIL PROTECTED] Index: linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c === --- linux-2.6.20-rc1.orig/arch/powerpc/oprofile/op_model_cell.c 2007-01-18 16:56:47.300605984 -0600 +++ linux-2.6.20-rc1/arch/powerpc/oprofile/op_model_cell.c 2007-01-24 12:16:16.225609136 -0600 @@ -39,6 +39,9 @@ #include ../platforms/cell/interrupt.h #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ +#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying + * PPU_CYCLES event + */ #define CBE_COUNT_ALL_CYCLES 0x4280 /* PPU cycle event specifier */ #define NUM_THREADS 2 /* number of physical threads in @@ -62,7 +65,7 @@ struct pm_signal { u16 cpu; /* Processor to modify */ u16 sub_unit; /* hw subunit this applies to (if applicable) */ - u16 signal_group; /* Signal Group to Enable/Disable */ + short int signal_group; /* Signal Group to Enable/Disable */ u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event * Bus Word(s) (bitmask) */ @@ -173,26 +176,40 @@ static void pm_rtas_activate_signals(u32 node, u32 count) { int ret; - int j; + int i, j; struct pm_signal pm_signal_local[NR_PHYS_CTRS]; + /* There is no debug setup required for the cycles event. + * Note that only events in the same group can be used. + * Otherwise, there will be conflicts in correctly routing + * the signals on the debug bus. It is the responsiblity + * of the OProfile user tool to check the events are in + * the same group. + */ + i = 0; for (j = 0; j count; j++) { - /* fw expects physical cpu # */ - pm_signal_local[j].cpu = node; - pm_signal_local[j].signal_group - = pm_signal[j].signal_group; - pm_signal_local[j].bus_word = pm_signal[j].bus_word; - pm_signal_local[j].sub_unit = pm_signal[j].sub_unit; - pm_signal_local[j].bit = pm_signal[j].bit; - } + if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) { - ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, - pm_signal_local, - count * sizeof(struct pm_signal)); + /* fw expects physical cpu # */ + pm_signal_local[i].cpu = node; + pm_signal_local[i].signal_group += pm_signal[j].signal_group; + pm_signal_local[i].bus_word = pm_signal[j].bus_word; + pm_signal_local[i].sub_unit = pm_signal[j].sub_unit; + pm_signal_local[i].bit = pm_signal[j].bit; + i++; + } + } - if (ret) - printk(KERN_WARNING %s: rtas returned: %d\n, - __FUNCTION__, ret); + if (i != 0) { + ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE, + pm_signal_local, + i * sizeof(struct pm_signal)); + + if (ret) + printk(KERN_WARNING %s: rtas returned: %d\n, + __FUNCTION__, ret); + } } /* @@ -209,7 +226,7 @@ /* Special Event: Count all cpu cycles */ pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES; p = (pm_signal[ctr]); - p-signal_group = 21; + p-signal_group = PPU_CYCLES_GRP_NUM; p-bus_word = 1; p-sub_unit = 0; p-bit = 0;
[RFC, PATCH 3/4] Add support to OProfile for profiling Cell BE SPUs -- update
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-18 16:43:14.324526032 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-26 16:16:35.219668640 -0600 @@ -78,21 +78,46 @@ static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); -static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(spu_switch_notifier, ctx ? ctx-object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_switch_notify(); + */ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); +list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + spu-notify_active = 1; + wake_up_all(ctx-stop_wq); + } +mutex_unlock(spu_prio-active_mutex[node]); + } +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-26 16:26:49.733703448 -0600 @@ -180,6 +180,7 @@ int spu_activate(struct spu_context *ctx, u64 flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); +void spu_switch_notify(struct spu *spu, struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c 2007-01-26 16:24:38.979744856 -0600 @@ -45,9 +45,10 @@ u64 pte_fault; *stat = ctx-ops-status_read(ctx); - if (ctx-state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx-spu; + if (ctx-state != SPU_STATE_RUNNABLE || spu-notify_active) + return 1; pte_fault = spu-dsisr (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat 0x1) || pte_fault || spu-class_0_pending) ? 1 : 0; @@ -305,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct spu * spu; u32 status; if (down_interruptible(ctx-run_sema)) @@ -318,8 +320,16 @@ do { ret = spufs_wait(ctx-stop_wq, spu_stopped(ctx, status)); + spu = ctx-spu; if (unlikely(ret)) break; + if (unlikely(spu-notify_active)) { + spu-notify_active = 0; + if (!(status SPU_STATUS_STOPPED_BY_STOP)) { +spu_switch_notify(spu, ctx); +continue; + } + } if ((status SPU_STATUS_STOPPED_BY_STOP) (status SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); Index: linux-2.6.20-rc1/include/asm-powerpc/spu.h === --- linux-2.6.20-rc1.orig/include/asm-powerpc/spu.h 2007-01-18 16:43:19.932605072 -0600 +++ linux-2.6.20-rc1/include/asm-powerpc/spu.h 2007-01-24 12:17:30.209676992 -0600 @@ -144,6 +144,7 @@ void* pdata; /* platform private data */ struct sys_device sysdev; + int notify_active; }; struct spu *spu_alloc(void);
[RFC, PATCH 4/4] Add support to OProfile for profiling Cell BE SPUs -- update
Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson [EMAIL PROTECTED] This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig === --- linux-2.6.20-rc1.orig/arch/powerpc/configs/cell_defconfig 2007-01-18 16:43:14.230540320 -0600 +++ linux-2.6.20-rc1/arch/powerpc/configs/cell_defconfig 2007-01-29 10:32:03.386789608 -0600 @@ -1403,7 +1403,7 @@ # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m # CONFIG_KPROBES is not set # Index: linux-2.6.20-rc1/arch/powerpc/oprofile/cell/pr_util.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.20-rc1/arch/powerpc/oprofile/cell/pr_util.h 2007-01-29 10:32:03.388789304 -0600 @@ -0,0 +1,75 @@ + /* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef PR_UTIL_H +#define PR_UTIL_H + +#include linux/cpumask.h +#include linux/oprofile.h +#include asm/cell-pmu.h +#include asm/spu.h + +#define number_of_online_nodes(nodes) {\ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp nodes) \ +nodes++; \ + } \ +} + + +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; + +/* The three functions below are for maintaining and accessing + * the vma-to-file offset map. + */ +vma_map_t * create_vma_map(const struct spu * spu, u64 objectid); +unsigned int vma_map_lookup(vma_map_t *map, unsigned int vma, + const struct spu * aSpu); +void vma_map_free(struct vma_map *map); + +/* + * Entry point for SPU profiling. + * cycles_reset is the SPU_CYCLES count value specified by the user. + */ +void start_spu_profiling(unsigned int cycles_reset); + +void stop_spu_profiling(void); + + +/* add the necessary profiling hooks */ +int spu_sync_start(void); + +/* remove the hooks */ +int spu_sync_stop(void); + +/* Record SPU program counter samples to the oprofile event buffer. */ +void spu_sync_buffer(int spu_num, unsigned int * samples, + int num_samples); + +#endif// PR_UTIL_H Index: linux-2.6.20-rc1/arch/powerpc/oprofile/cell/spu_profiler.c === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.20-rc1/arch/powerpc/oprofile/cell/spu_profiler.c 2007-01-29 10:32:03.392788696 -0600 @@ -0,0 +1,204 @@ +/* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Authors: Maynard Johnson [EMAIL PROTECTED] + * Carl Love [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include linux/hrtimer.h +#include linux/smp.h +#include linux/slab.h +#include asm/cell-pmu.h +#include pr_util.h + +#define TRACE_ARRAY_SIZE 1024 +static u32 * samples; +static u32 * samples_per_node; + +static int spu_prof_running = 0; +static unsigned int profiling_interval = 0; + +extern int num_nodes; +extern unsigned int khzfreq; + +/* + * Oprofile setup functions + */ + +#define NUM_SPU_BITS_TRBUF 16 +#define SPUS_PER_TB_ENTRY 4 +#define SPUS_PER_NODE 8 + +/* + * Collect the SPU program counter samples from the trace buffer. + * The global variable usage is as follows: + *samples[total-spus][TRACE_ARRAY_SIZE] - array to store SPU PC samples + * Assumption, the array will be all zeros on entry. + *u32 samples_per_node[num_nodes] - array of how many valid samples per node + */ +static void cell_spu_pc_collection(void) +{ + int cpu; + int node; + int spu; + u32 trace_addr; +/* the trace buffer is 128 bits */ + u64 trace_buffer[2]; + u64
Re: [Cbe-oss-dev] [PATCH] Cell SPU task notification - repost of patch with updates
This patch makes all the changes suggested by Christopher, with the exception of the suggestion to use the sched_flags field versus a new member to the spu_context struct to signal the need to "notify already active". I don't have the sched_flags change in my 2.6.20-rc1 tree. I can send another patch later if/when the sched_flags changes appears in the kernel version we end up picking for final oprofile-spu development. Comments welcome. Thanks. -Maynard Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-18 16:43:14.324526032 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-26 16:16:35.219668640 -0600 @@ -78,21 +78,46 @@ static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); -static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(_switch_notifier, ctx ? ctx->object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_switch_notify(); + */ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); +list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; + spu->notify_active = 1; + wake_up_all(>stop_wq); + } +mutex_unlock(_prio->active_mutex[node]); + } +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-26 16:26:49.733703448 -0600 @@ -180,6 +180,7 @@ int spu_activate(struct spu_context *ctx, u64 flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); +void spu_switch_notify(struct spu *spu, struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c 2007-01-26 16:24:38.979744856 -0600 @@ -45,9 +45,10 @@ u64 pte_fault; *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx->spu; + if (ctx->state != SPU_STATE_RUNNABLE || spu->notify_active) + return 1; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; @@ -305,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct spu * spu; u32 status; if (down_interruptible(>run_sema)) @@ -318,8 +320,16 @@ do { ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, )); + spu = ctx->spu; if (unlikely(ret)) break; + if (unlikely(spu->notify_active)) { + spu->notify_active = 0; + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { +spu_switch_notify(spu, ctx); +continue; + } + } if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); Index: linux-2.6.20-rc1/include/asm-powerpc/spu.h === --- linux-2.6.20-rc1.orig/include/asm-powerpc/spu.h 2007-01-18 16:43:19.932605072 -0600 +++ linux-2.6.20-rc1/include/asm-powerpc/spu.h 2007-01-24 12:17:30.209676992 -0600 @@ -144,6 +144,7
Re: [Cbe-oss-dev] [PATCH] Cell SPU task notification - repost of patch with updates
This patch makes all the changes suggested by Christopher, with the exception of the suggestion to use the sched_flags field versus a new member to the spu_context struct to signal the need to notify already active. I don't have the sched_flags change in my 2.6.20-rc1 tree. I can send another patch later if/when the sched_flags changes appears in the kernel version we end up picking for final oprofile-spu development. Comments welcome. Thanks. -Maynard Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-18 16:43:14.324526032 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-26 16:16:35.219668640 -0600 @@ -78,21 +78,46 @@ static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); -static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) +void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(spu_switch_notifier, ctx ? ctx-object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_switch_notify(); + */ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); +list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + spu-notify_active = 1; + wake_up_all(ctx-stop_wq); + } +mutex_unlock(spu_prio-active_mutex[node]); + } +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-26 16:26:49.733703448 -0600 @@ -180,6 +180,7 @@ int spu_activate(struct spu_context *ctx, u64 flags); void spu_deactivate(struct spu_context *ctx); void spu_yield(struct spu_context *ctx); +void spu_switch_notify(struct spu *spu, struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); Index: linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.20-rc1.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-18 16:43:14.340523600 -0600 +++ linux-2.6.20-rc1/arch/powerpc/platforms/cell/spufs/run.c 2007-01-26 16:24:38.979744856 -0600 @@ -45,9 +45,10 @@ u64 pte_fault; *stat = ctx-ops-status_read(ctx); - if (ctx-state != SPU_STATE_RUNNABLE) - return 1; + spu = ctx-spu; + if (ctx-state != SPU_STATE_RUNNABLE || spu-notify_active) + return 1; pte_fault = spu-dsisr (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat 0x1) || pte_fault || spu-class_0_pending) ? 1 : 0; @@ -305,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct spu * spu; u32 status; if (down_interruptible(ctx-run_sema)) @@ -318,8 +320,16 @@ do { ret = spufs_wait(ctx-stop_wq, spu_stopped(ctx, status)); + spu = ctx-spu; if (unlikely(ret)) break; + if (unlikely(spu-notify_active)) { + spu-notify_active = 0; + if (!(status SPU_STATUS_STOPPED_BY_STOP)) { +spu_switch_notify(spu, ctx); +continue; + } + } if ((status SPU_STATUS_STOPPED_BY_STOP) (status SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx); Index: linux-2.6.20-rc1/include/asm-powerpc/spu.h === --- linux-2.6.20-rc1.orig/include/asm-powerpc/spu.h 2007-01-18 16:43:19.932605072 -0600 +++ linux-2.6.20-rc1/include/asm-powerpc/spu.h 2007-01-24 12:17:30.209676992 -0600 @@ -144,6 +144,7 @@ void* pdata; /* platform private data */ struct sys_device sysdev; + int notify_active; }; struct spu
Re: [Cbe-oss-dev] [PATCH] Cell SPU task notification
Christoph Hellwig wrote: Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-15 16:22:31.808461448 -0600 @@ -84,15 +84,42 @@ ctx ? ctx->object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes +* sees their notify_active flag is set, they will call +* spu_notify_already_active(). +*/ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); +list_for_each_entry(spu, _prio->active_list[node], list) { You seem to have some issues with tabs vs spaces for indentation here. fixed + struct spu_context *ctx = spu->ctx; + spu->notify_active = 1; Please make this a bit in the sched_flags field that's added in the scheduler patch series I sent out. I haven't seen that the scheduler patch series got applied yet. This Cell spu task notification patch is a pre-req for OProfile development to support profiling SPUs. When the scheduler patch gets applied to a kernel version that fits our needs for our OProfile development, I don't see any problem in using the sched_flags field instead of notify_active. + wake_up_all(>stop_wq); + smp_wmb(); + } +mutex_unlock(_prio->active_mutex[node]); + } + yield(); +} Why do you add the yield() here? yield is pretty much a sign for a bug Yes, the yield() and the memory barriers were leftovers from an earlier ill-conceived attempt at solving this problem. They should have been removed. They're gone now. +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} Please just call spu_switch_notify directly from the only I hesitated doing this since it would entail changing spu_switch_notify from being static to non-static. I'd like to get Arnd's opinion on this question before going ahead and making such a change. caller. Also the check for ctx->spu beeing there is not required if you look a the caller. *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) - return 1; + smp_rmb(); What do you need the barrier for here? Removed. - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [PATCH] Cell SPU task notification
Christoph Hellwig wrote: Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-15 16:22:31.808461448 -0600 @@ -84,15 +84,42 @@ ctx ? ctx-object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes +* sees their notify_active flag is set, they will call +* spu_notify_already_active(). +*/ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); +list_for_each_entry(spu, spu_prio-active_list[node], list) { You seem to have some issues with tabs vs spaces for indentation here. fixed + struct spu_context *ctx = spu-ctx; + spu-notify_active = 1; Please make this a bit in the sched_flags field that's added in the scheduler patch series I sent out. I haven't seen that the scheduler patch series got applied yet. This Cell spu task notification patch is a pre-req for OProfile development to support profiling SPUs. When the scheduler patch gets applied to a kernel version that fits our needs for our OProfile development, I don't see any problem in using the sched_flags field instead of notify_active. + wake_up_all(ctx-stop_wq); + smp_wmb(); + } +mutex_unlock(spu_prio-active_mutex[node]); + } + yield(); +} Why do you add the yield() here? yield is pretty much a sign for a bug Yes, the yield() and the memory barriers were leftovers from an earlier ill-conceived attempt at solving this problem. They should have been removed. They're gone now. +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx-spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} Please just call spu_switch_notify directly from the only I hesitated doing this since it would entail changing spu_switch_notify from being static to non-static. I'd like to get Arnd's opinion on this question before going ahead and making such a change. caller. Also the check for ctx-spu beeing there is not required if you look a the caller. *stat = ctx-ops-status_read(ctx); - if (ctx-state != SPU_STATE_RUNNABLE) - return 1; + smp_rmb(); What do you need the barrier for here? Removed. - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] Cell SPU task notification -- updated patch: #1
Attached is an updated patch that addresses Michael Ellerman's comments. One comment made by Michael has not yet been addressed: The comment was in regard to the for-loop in spufs/sched.c:notify_spus_active(). He wondered if the scheduler can swap a context from one node to another. If so, there's a small window in this loop (where we switch the lock from one node's active list to the next) where it may be possible we might miss waking up a context and send a spurious wakeup to another. Arnd . . . can you comment on this question? Thanks. -Maynard Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-15 16:22:31.808461448 -0600 @@ -84,15 +84,42 @@ ctx ? ctx->object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_notify_already_active(). + */ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); +list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; + spu->notify_active = 1; + wake_up_all(>stop_wq); + smp_wmb(); + } +mutex_unlock(_prio->active_mutex[node]); + } + yield(); +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) @@ -250,6 +277,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:18:40.093354608 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:31:03.610345792 -0600 @@ -183,6 +183,7 @@ void spu_yield(struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); +void spu_notify_already_active(struct spu_context *ctx); extern char *isolated_loader; Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:33:51.979311680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c 2007-01-15 16:31:30.10442 -0600 @@ -45,9 +45,11 @@ u64 pte_fault; *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) - return 1; + smp_rmb(); + spu = ctx->spu; + if (ctx->state != SPU_STATE_RUNNABLE || spu->notify_active) + return 1; pte_fault = spu->dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; @@ -304,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct * spu; u32 status; if (down_interruptible(>run_sema)) @@ -317,8 +320,16 @@ do { ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, )); + spu = ctx->spu; if (unlikely(ret)) break; + if (unlikely(spu->notify_active)) { + spu->notify_active = 0; + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) { +spu_notify_already_active(ctx); +continue; + } + } if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx);
Re: [PATCH] Cell SPU task notification
Michael, Thanks for your comments! My responses are below. -Maynard Michael Ellerman wrote: Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Hi Maynard, It'd be really good if you could convince your mailer to send patches inline :) Mozilla Mail is my client, and I don't see any option to force patches inline. Of course, there is an option to display received attachments as inline, but that doesn't help you. Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-11 09:45:37.918333128 -0600 @@ -46,6 +46,8 @@ #define SPU_MIN_TIMESLICE (100 * HZ / 1000) +int notify_active[MAX_NUMNODES]; DOH! Right, this is patently wrong. Somehow, I misunderstood what MAX_NUMNODES was. I'll fix it. You're basing the size of the array on MAX_NUMNODES (1 << CONFIG_NODES_SHIFT), but then indexing it by spu->number. It's quite possible we'll have a system with MAX_NUMNODES == 1, but > 1 spus, in which case this code is going to break. The PS3 is one such system. Instead I think you should have a flag in the spu struct. Yes, that's certainly an option I'll look at. #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) struct spu_prio_array { unsigned long bitmap[SPU_BITMAP_SIZE]; @@ -81,18 +83,45 @@ static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(_switch_notifier, - ctx ? ctx->object_id : 0, spu); +ctx ? ctx->object_id : 0, spu); +} Try not to make whitespace only changes in the same patch as actual code changes. + +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call +* spu_notify_already_active(). +*/ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); +list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; + wake_up_all(>stop_wq); + notify_active[ctx->spu->number] = 1; + smp_mb(); + } I don't understand why you're setting the notify flag after you do the wake_up_all() ? Right, I'll move it. You only need a smp_wmb() here. OK. Does the scheduler guarantee that ctxs won't swap nodes? Otherwise Don't know. Arnd would probably know off the top of his head. between releasing the lock on one node and getting the lock on the next, a ctx could migrate between them - which would cause either spurious wake ups, or missing a ctx altogether. Although I'm not sure if it's that important. How important is it? If the SPUs were executing different code or taking different paths through the same code, then seeing the samples for each SPU is important. But if this were the case, the user would easily see the misssing data for the missing SPU(s). Since this sounds like a very small window, re-running the profile would _probably_ yield a complete profile. On the other hand, if we can avoid it, we should. When I repost the fixed-up patch, I'll add a comment/question for Arnd about this issue. +mutex_unlock(_prio->active_mutex[node]); + } + yield(); } int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) @@ -250,6 +279,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * Index: linux-2.6.19-rc6-arnd
Re: [PATCH] Cell SPU task notification
Michael, Thanks for your comments! My responses are below. -Maynard Michael Ellerman wrote: Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Hi Maynard, It'd be really good if you could convince your mailer to send patches inline :) Mozilla Mail is my client, and I don't see any option to force patches inline. Of course, there is an option to display received attachments as inline, but that doesn't help you. Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-11 09:45:37.918333128 -0600 @@ -46,6 +46,8 @@ #define SPU_MIN_TIMESLICE (100 * HZ / 1000) +int notify_active[MAX_NUMNODES]; DOH! Right, this is patently wrong. Somehow, I misunderstood what MAX_NUMNODES was. I'll fix it. You're basing the size of the array on MAX_NUMNODES (1 CONFIG_NODES_SHIFT), but then indexing it by spu-number. It's quite possible we'll have a system with MAX_NUMNODES == 1, but 1 spus, in which case this code is going to break. The PS3 is one such system. Instead I think you should have a flag in the spu struct. Yes, that's certainly an option I'll look at. #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) struct spu_prio_array { unsigned long bitmap[SPU_BITMAP_SIZE]; @@ -81,18 +83,45 @@ static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(spu_switch_notifier, - ctx ? ctx-object_id : 0, spu); +ctx ? ctx-object_id : 0, spu); +} Try not to make whitespace only changes in the same patch as actual code changes. + +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call +* spu_notify_already_active(). +*/ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); +list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + wake_up_all(ctx-stop_wq); + notify_active[ctx-spu-number] = 1; + smp_mb(); + } I don't understand why you're setting the notify flag after you do the wake_up_all() ? Right, I'll move it. You only need a smp_wmb() here. OK. Does the scheduler guarantee that ctxs won't swap nodes? Otherwise Don't know. Arnd would probably know off the top of his head. between releasing the lock on one node and getting the lock on the next, a ctx could migrate between them - which would cause either spurious wake ups, or missing a ctx altogether. Although I'm not sure if it's that important. How important is it? If the SPUs were executing different code or taking different paths through the same code, then seeing the samples for each SPU is important. But if this were the case, the user would easily see the misssing data for the missing SPU(s). Since this sounds like a very small window, re-running the profile would _probably_ yield a complete profile. On the other hand, if we can avoid it, we should. When I repost the fixed-up patch, I'll add a comment/question for Arnd about this issue. +mutex_unlock(spu_prio-active_mutex[node]); + } + yield(); } int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) @@ -250,6 +279,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx-spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs
Re: [PATCH] Cell SPU task notification -- updated patch: #1
Attached is an updated patch that addresses Michael Ellerman's comments. One comment made by Michael has not yet been addressed: The comment was in regard to the for-loop in spufs/sched.c:notify_spus_active(). He wondered if the scheduler can swap a context from one node to another. If so, there's a small window in this loop (where we switch the lock from one node's active list to the next) where it may be possible we might miss waking up a context and send a spurious wakeup to another. Arnd . . . can you comment on this question? Thanks. -Maynard Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-15 16:22:31.808461448 -0600 @@ -84,15 +84,42 @@ ctx ? ctx-object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_notify_already_active(). + */ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); +list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + spu-notify_active = 1; + wake_up_all(ctx-stop_wq); + smp_wmb(); + } +mutex_unlock(spu_prio-active_mutex[node]); + } + yield(); +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) @@ -250,6 +277,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx-spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:18:40.093354608 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:31:03.610345792 -0600 @@ -183,6 +183,7 @@ void spu_yield(struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); +void spu_notify_already_active(struct spu_context *ctx); extern char *isolated_loader; Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:33:51.979311680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c 2007-01-15 16:31:30.10442 -0600 @@ -45,9 +45,11 @@ u64 pte_fault; *stat = ctx-ops-status_read(ctx); - if (ctx-state != SPU_STATE_RUNNABLE) - return 1; + smp_rmb(); + spu = ctx-spu; + if (ctx-state != SPU_STATE_RUNNABLE || spu-notify_active) + return 1; pte_fault = spu-dsisr (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED); return (!(*stat 0x1) || pte_fault || spu-class_0_pending) ? 1 : 0; @@ -304,6 +306,7 @@ u32 *npc, u32 *event) { int ret; + struct * spu; u32 status; if (down_interruptible(ctx-run_sema)) @@ -317,8 +320,16 @@ do { ret = spufs_wait(ctx-stop_wq, spu_stopped(ctx, status)); + spu = ctx-spu; if (unlikely(ret)) break; + if (unlikely(spu-notify_active)) { + spu-notify_active = 0; + if (!(status SPU_STATUS_STOPPED_BY_STOP)) { +spu_notify_already_active(ctx); +continue; + } + } if ((status SPU_STATUS_STOPPED_BY_STOP) (status SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx);
[PATCH] Cell SPU task notification
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-11 09:45:37.918333128 -0600 @@ -46,6 +46,8 @@ #define SPU_MIN_TIMESLICE (100 * HZ / 1000) +int notify_active[MAX_NUMNODES]; + #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) struct spu_prio_array { unsigned long bitmap[SPU_BITMAP_SIZE]; @@ -81,18 +83,45 @@ static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(_switch_notifier, - ctx ? ctx->object_id : 0, spu); + ctx ? ctx->object_id : 0, spu); +} + +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_notify_already_active(). + */ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); +list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; + wake_up_all(>stop_wq); + notify_active[ctx->spu->number] = 1; + smp_mb(); + } +mutex_unlock(_prio->active_mutex[node]); + } + yield(); } int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) @@ -250,6 +279,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:18:40.093354608 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:31:03.610345792 -0600 @@ -183,6 +183,7 @@ void spu_yield(struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); +void spu_notify_already_active(struct spu_context *ctx); extern char *isolated_loader; Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:33:51.979311680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c 2007-01-11 10:17:20.777344984 -0600 @@ -10,6 +10,8 @@ #include "spufs.h" +extern int notify_active[MAX_NUMNODES]; + /* interrupt-level stop callback function. */ void spufs_stop_callback(struct spu *spu) { @@ -45,7 +47,9 @@ u64 pte_fault; *stat = ctx->ops->status_read(ctx); - if (ctx->state != SPU_STATE_RUNNABLE) + smp_mb(); + + if (ctx->state != SPU_STATE_RUNNABLE || notify_active[ctx->spu->number]) return 1; spu = ctx->spu; pte_fault = spu->dsisr & @@ -319,6 +323,11 @@ ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, )); if (unlikely(ret)) break; + if (unlikely(notify_active[ctx->spu->number])) { + notify_active[ctx->spu->number] = 0; + if (!(status & SPU_STATUS_STOPPED_BY_STOP)) +spu_notify_already_active(ctx); + } if ((status & SPU_STATUS_STOPPED_BY_STOP) && (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx);
Re: [Cbe-oss-dev] [PATCH -- RFC]Add notification for active Cell SPU tasks -- update #2
I've reworked this patch to resolve the problem I was seeing. I will post the new patch in a separate, new posting with subject line of "[PATCH] Cell SPU task notification". -Maynard - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [PATCH -- RFC]Add notification for active Cell SPU tasks -- update #2
I've reworked this patch to resolve the problem I was seeing. I will post the new patch in a separate, new posting with subject line of [PATCH] Cell SPU task notification. -Maynard - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] Cell SPU task notification
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register so that the caller is also notified of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-11 09:45:37.918333128 -0600 @@ -46,6 +46,8 @@ #define SPU_MIN_TIMESLICE (100 * HZ / 1000) +int notify_active[MAX_NUMNODES]; + #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) struct spu_prio_array { unsigned long bitmap[SPU_BITMAP_SIZE]; @@ -81,18 +83,45 @@ static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(spu_switch_notifier, - ctx ? ctx-object_id : 0, spu); + ctx ? ctx-object_id : 0, spu); +} + +static void notify_spus_active(void) +{ + int node; + /* Wake up the active spu_contexts. When the awakened processes + * sees their notify_active flag is set, they will call + * spu_notify_already_active(). + */ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); +list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + wake_up_all(ctx-stop_wq); + notify_active[ctx-spu-number] = 1; + smp_mb(); + } +mutex_unlock(spu_prio-active_mutex[node]); + } + yield(); } int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) @@ -250,6 +279,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx-spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:18:40.093354608 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:31:03.610345792 -0600 @@ -183,6 +183,7 @@ void spu_yield(struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); +void spu_notify_already_active(struct spu_context *ctx); extern char *isolated_loader; Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:33:51.979311680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c 2007-01-11 10:17:20.777344984 -0600 @@ -10,6 +10,8 @@ #include spufs.h +extern int notify_active[MAX_NUMNODES]; + /* interrupt-level stop callback function. */ void spufs_stop_callback(struct spu *spu) { @@ -45,7 +47,9 @@ u64 pte_fault; *stat = ctx-ops-status_read(ctx); - if (ctx-state != SPU_STATE_RUNNABLE) + smp_mb(); + + if (ctx-state != SPU_STATE_RUNNABLE || notify_active[ctx-spu-number]) return 1; spu = ctx-spu; pte_fault = spu-dsisr @@ -319,6 +323,11 @@ ret = spufs_wait(ctx-stop_wq, spu_stopped(ctx, status)); if (unlikely(ret)) break; + if (unlikely(notify_active[ctx-spu-number])) { + notify_active[ctx-spu-number] = 0; + if (!(status SPU_STATUS_STOPPED_BY_STOP)) +spu_notify_already_active(ctx); + } if ((status SPU_STATUS_STOPPED_BY_STOP) (status SPU_STOP_STATUS_SHIFT == 0x2104)) { ret = spu_process_callback(ctx);
Re: [Cbe-oss-dev] [PATCH -- RFC]Add notification for active Cell SPU tasks -- update #2
The attached patch adds notification of already-active SPU tasks to SPUFS. However, there's a bug that I'm having trouble finding. The first execution of this code path works fine, but subsequent runs do not work properly. On subsequent runs, I can see that not all of the already-active tasks are getting awakened, so my OProfile driver is not able to collect samples for those SPU contexts that were not awakened. If anyone can see the error in my logic or if you have any other comments on this patch, I would appreciate it. Thanks. Maynard Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register to notify the caller of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-08 18:37:19.209355672 -0600 @@ -46,6 +46,10 @@ #define SPU_MIN_TIMESLICE (100 * HZ / 1000) +int notify_profiler; + +struct list_head active_spu_ctx_cache[MAX_NUMNODES]; + #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) struct spu_prio_array { unsigned long bitmap[SPU_BITMAP_SIZE]; @@ -81,19 +85,62 @@ static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(_switch_notifier, - ctx ? ctx->object_id : 0, spu); + ctx ? ctx->object_id : 0, spu); +} + +static void notify_spus_active(void) +{ + int node; + /* Obtain and cache the active spu_contexts. */ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); + list_for_each_entry(spu, _prio->active_list[node], +list) { + struct spu_context *ctx = spu->ctx; + /* Make sure the context doesn't go away before we use it. */ + get_spu_context(ctx); + list_add(>active_cache_list, + _spu_ctx_cache[node]); + } + mutex_unlock(_prio->active_mutex[node]); + } + /* Now wake up the PPE tasks associated with the active spu contexts + * we found above. + */ + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu_context *ctx, *tmp; + list_for_each_entry_safe(ctx, tmp, + _spu_ctx_cache[node], + active_cache_list) { + notify_profiler = 1; + wake_up_all(>stop_wq); + list_del(>active_cache_list); + put_spu_context(ctx); + set_current_state(TASK_RUNNING); + schedule(); + } + } } -int spu_switch_event_register(struct notifier_block * n) +int spu_switch_event_register(struct notifier_block *n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } -int spu_switch_event_unregister(struct notifier_block * n) +EXPORT_SYMBOL_GPL(spu_switch_event_register); + +int spu_switch_event_unregister(struct notifier_block *n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); + static inline void bind_context(struct spu *spu, struct spu_context *ctx) { @@ -250,6 +297,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * @@ -345,6 +400,7 @@ for (i = 0; i < MAX_NUMNODES; i++) { mutex_init(_prio->active_mutex[i]); INIT_LIST_HEAD(_prio->active_list[i]); + INIT_LIST_HEAD(_spu_ctx_cache[i]); } return 0; } Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:18:40.093354608 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:31:03.610345792 -0600 @@ -183,6 +183,7 @@ void spu_yield(struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); +void spu_notify_already_active(struct spu_context *ctx); extern char *isolated_loader; Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:33:51.979311680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:34:
Re: [Cbe-oss-dev] [PATCH -- RFC]Add notification for active Cell SPU tasks -- update #2
The attached patch adds notification of already-active SPU tasks to SPUFS. However, there's a bug that I'm having trouble finding. The first execution of this code path works fine, but subsequent runs do not work properly. On subsequent runs, I can see that not all of the already-active tasks are getting awakened, so my OProfile driver is not able to collect samples for those SPU contexts that were not awakened. If anyone can see the error in my logic or if you have any other comments on this patch, I would appreciate it. Thanks. Maynard Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register to notify the caller of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2007-01-08 18:37:19.209355672 -0600 @@ -46,6 +46,10 @@ #define SPU_MIN_TIMESLICE (100 * HZ / 1000) +int notify_profiler; + +struct list_head active_spu_ctx_cache[MAX_NUMNODES]; + #define SPU_BITMAP_SIZE (((MAX_PRIO+BITS_PER_LONG)/BITS_PER_LONG)+1) struct spu_prio_array { unsigned long bitmap[SPU_BITMAP_SIZE]; @@ -81,19 +85,62 @@ static void spu_switch_notify(struct spu *spu, struct spu_context *ctx) { blocking_notifier_call_chain(spu_switch_notifier, - ctx ? ctx-object_id : 0, spu); + ctx ? ctx-object_id : 0, spu); +} + +static void notify_spus_active(void) +{ + int node; + /* Obtain and cache the active spu_contexts. */ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); + list_for_each_entry(spu, spu_prio-active_list[node], +list) { + struct spu_context *ctx = spu-ctx; + /* Make sure the context doesn't go away before we use it. */ + get_spu_context(ctx); + list_add(ctx-active_cache_list, + active_spu_ctx_cache[node]); + } + mutex_unlock(spu_prio-active_mutex[node]); + } + /* Now wake up the PPE tasks associated with the active spu contexts + * we found above. + */ + for (node = 0; node MAX_NUMNODES; node++) { + struct spu_context *ctx, *tmp; + list_for_each_entry_safe(ctx, tmp, + active_spu_ctx_cache[node], + active_cache_list) { + notify_profiler = 1; + wake_up_all(ctx-stop_wq); + list_del(ctx-active_cache_list); + put_spu_context(ctx); + set_current_state(TASK_RUNNING); + schedule(); + } + } } -int spu_switch_event_register(struct notifier_block * n) +int spu_switch_event_register(struct notifier_block *n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } -int spu_switch_event_unregister(struct notifier_block * n) +EXPORT_SYMBOL_GPL(spu_switch_event_register); + +int spu_switch_event_unregister(struct notifier_block *n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); + static inline void bind_context(struct spu *spu, struct spu_context *ctx) { @@ -250,6 +297,14 @@ return spu_get_idle(ctx, flags); } +void spu_notify_already_active(struct spu_context *ctx) +{ + struct spu *spu = ctx-spu; + if (!spu) + return; + spu_switch_notify(spu, ctx); +} + /* The three externally callable interfaces * for the scheduler begin here. * @@ -345,6 +400,7 @@ for (i = 0; i MAX_NUMNODES; i++) { mutex_init(spu_prio-active_mutex[i]); INIT_LIST_HEAD(spu_prio-active_list[i]); + INIT_LIST_HEAD(active_spu_ctx_cache[i]); } return 0; } Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:18:40.093354608 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/spufs.h 2007-01-08 18:31:03.610345792 -0600 @@ -183,6 +183,7 @@ void spu_yield(struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); +void spu_notify_already_active(struct spu_context *ctx); extern char *isolated_loader; Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:33:51.979311680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/run.c 2007-01-08 18:34:09.260344944 -0600
Re: [PATCH -- RFC] Add support to OProfile for profiling Cell BE SPUs
Any comments on the attached patch would be appreciated. Thank you. -Maynard --- Maynard Johnson wrote: The attached patch extends OProfile's Cell support (committed into 2.6.20-rc1), adding the capability to do time-based profiling of the SPUs. This is a preliminary patch we're posting for comments. Development is not 100% complete yet, but very close. This patch is dependent on two outstanding patches that have not yet been committed to mainline: 1) the cleanup patch for the initial OProfile Cell PPU support (posted on Nov 27); and 2) the spu notifier patch (posted on Dec 1), which exports the [un]register functions in SPUFS. It is also dependent on an internal patch, which will be submitted once the cleanup patch is committed. So please don't try to apply this patch to a source tree yet. The code is functional and passing all test scenarios, except one: if an SPU task is already active before OProfile is started, the notification we receive for this already-active task happens in the wrong context, so we're unable to collect the information we need about the SPU binary being executed. The spu notifer patch mentioned above was meant to solve this problem, but in fact, it does not (which is why that patch hasn't been committed yet). We're currently investigating other options. All comments are welcome. NOTE: The availability of the developers of this patch is limited between now and Jan 2, 2007, so replies to comments may be delayed until then. Thanks. Maynard Johnson IBM LTC Toolchain ___ Linuxppc-dev mailing list [EMAIL PROTECTED] https://ozlabs.org/mailman/listinfo/linuxppc-dev Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson <[EMAIL PROTECTED]> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/configs/cell_defconfig 2006-12-04 10:56:04.699570280 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig 2006-12-08 10:38:58.049707128 -0600 @@ -1136,7 +1136,7 @@ # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m # CONFIG_KPROBES is not set # Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h 2006-12-14 11:58:04.393836120 -0600 @@ -0,0 +1,74 @@ + /* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson <[EMAIL PROTECTED]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef PR_UTIL_H +#define PR_UTIL_H + +#include +#include +#include +#include + +#define number_of_online_nodes(nodes) \ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp > nodes) \ +nodes++; \ + } + + +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; + +/* The three functions below are for maintaining and accessing + * the vma-to-file offset map. + */ +vma_map_t * create_vma_map(const struct spu * spu, u64 objectid); +unsigned int vma_map_lookup(vma_map_t *map, unsigned int vma, + const struct spu * aSpu); +void vma_map_free(struct vma_map *map); + +/* + * Entry point for SPU profiling. + * cycles_reset is the SPU_CYCLES count value specified by the user. + */ +void start_spu_profiling(unsigned int cycles_reset); + +void stop_spu_profiling(void); + + +/* add the necessary profiling hooks */ +int spu_sync_start(void); + +/* remove the hooks */ +int spu_sync_stop(void); + +/* Record SPU program counter samples to the oprofile event buffer. */ +void spu_sync_buffer(int spu_num, unsigned int * samples, + int num_samples); + +#endif// PR_UTIL_H Index: linux-2.6.19-rc6-arnd1+pa
Re: [PATCH -- RFC] Add support to OProfile for profiling Cell BE SPUs
Any comments on the attached patch would be appreciated. Thank you. -Maynard --- Maynard Johnson wrote: The attached patch extends OProfile's Cell support (committed into 2.6.20-rc1), adding the capability to do time-based profiling of the SPUs. This is a preliminary patch we're posting for comments. Development is not 100% complete yet, but very close. This patch is dependent on two outstanding patches that have not yet been committed to mainline: 1) the cleanup patch for the initial OProfile Cell PPU support (posted on Nov 27); and 2) the spu notifier patch (posted on Dec 1), which exports the [un]register functions in SPUFS. It is also dependent on an internal patch, which will be submitted once the cleanup patch is committed. So please don't try to apply this patch to a source tree yet. The code is functional and passing all test scenarios, except one: if an SPU task is already active before OProfile is started, the notification we receive for this already-active task happens in the wrong context, so we're unable to collect the information we need about the SPU binary being executed. The spu notifer patch mentioned above was meant to solve this problem, but in fact, it does not (which is why that patch hasn't been committed yet). We're currently investigating other options. All comments are welcome. NOTE: The availability of the developers of this patch is limited between now and Jan 2, 2007, so replies to comments may be delayed until then. Thanks. Maynard Johnson IBM LTC Toolchain ___ Linuxppc-dev mailing list [EMAIL PROTECTED] https://ozlabs.org/mailman/listinfo/linuxppc-dev Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson [EMAIL PROTECTED] This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/configs/cell_defconfig 2006-12-04 10:56:04.699570280 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig 2006-12-08 10:38:58.049707128 -0600 @@ -1136,7 +1136,7 @@ # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m # CONFIG_KPROBES is not set # Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h 2006-12-14 11:58:04.393836120 -0600 @@ -0,0 +1,74 @@ + /* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef PR_UTIL_H +#define PR_UTIL_H + +#include linux/cpumask.h +#include linux/oprofile.h +#include asm/cell-pmu.h +#include asm/spu.h + +#define number_of_online_nodes(nodes) \ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp nodes) \ +nodes++; \ + } + + +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; + +/* The three functions below are for maintaining and accessing + * the vma-to-file offset map. + */ +vma_map_t * create_vma_map(const struct spu * spu, u64 objectid); +unsigned int vma_map_lookup(vma_map_t *map, unsigned int vma, + const struct spu * aSpu); +void vma_map_free(struct vma_map *map); + +/* + * Entry point for SPU profiling. + * cycles_reset is the SPU_CYCLES count value specified by the user. + */ +void start_spu_profiling(unsigned int cycles_reset); + +void stop_spu_profiling(void); + + +/* add the necessary profiling hooks */ +int spu_sync_start(void); + +/* remove the hooks */ +int spu_sync_stop(void); + +/* Record SPU program counter samples to the oprofile event buffer. */ +void spu_sync_buffer(int spu_num, unsigned int * samples, + int num_samples); + +#endif// PR_UTIL_H Index: linux
[PATCH -- RFC] Add support OProfile for profiling Cell BE SPUs
The attached patch extends OProfile's Cell support (committed into 2.6.20-rc1), adding the capability to do time-based profiling of the SPUs. This is a preliminary patch we're posting for comments. Development is not 100% complete yet, but very close. This patch is dependent on two outstanding patches that have not yet been committed to mainline: 1) the cleanup patch for the initial OProfile Cell PPU support (posted on Nov 27); and 2) the spu notifier patch (posted on Dec 1), which exports the [un]register functions in SPUFS. It is also dependent on an internal patch, which will be submitted once the cleanup patch is committed. So please don't try to apply this patch to a source tree yet. The code is functional and passing all test scenarios, except one: if an SPU task is already active before OProfile is started, the notification we receive for this already-active task happens in the wrong context, so we're unable to collect the information we need about the SPU binary being executed. The spu notifer patch mentioned above was meant to solve this problem, but in fact, it does not (which is why that patch hasn't been committed yet). We're currently investigating other options. All comments are welcome. NOTE: The availability of the developers of this patch is limited between now and Jan 2, 2007, so replies to comments may be delayed until then. Thanks. Maynard Johnson IBM LTC Toolchain Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson <[EMAIL PROTECTED]> This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/configs/cell_defconfig 2006-12-04 10:56:04.699570280 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig 2006-12-08 10:38:58.049707128 -0600 @@ -1136,7 +1136,7 @@ # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m # CONFIG_KPROBES is not set # Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h 2006-12-14 11:58:04.393836120 -0600 @@ -0,0 +1,74 @@ + /* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson <[EMAIL PROTECTED]> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef PR_UTIL_H +#define PR_UTIL_H + +#include +#include +#include +#include + +#define number_of_online_nodes(nodes) \ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp > nodes) \ +nodes++; \ + } + + +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; + +/* The three functions below are for maintaining and accessing + * the vma-to-file offset map. + */ +vma_map_t * create_vma_map(const struct spu * spu, u64 objectid); +unsigned int vma_map_lookup(vma_map_t *map, unsigned int vma, + const struct spu * aSpu); +void vma_map_free(struct vma_map *map); + +/* + * Entry point for SPU profiling. + * cycles_reset is the SPU_CYCLES count value specified by the user. + */ +void start_spu_profiling(unsigned int cycles_reset); + +void stop_spu_profiling(void); + + +/* add the necessary profiling hooks */ +int spu_sync_start(void); + +/* remove the hooks */ +int spu_sync_stop(void); + +/* Record SPU program counter samples to the oprofile event buffer. */ +void spu_sync_buffer(int spu_num, unsigned int * samples, +int num_samples); + +#endif// PR_UTIL_H Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/spu_profiler.c === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.
[PATCH -- RFC] Add support OProfile for profiling Cell BE SPUs
The attached patch extends OProfile's Cell support (committed into 2.6.20-rc1), adding the capability to do time-based profiling of the SPUs. This is a preliminary patch we're posting for comments. Development is not 100% complete yet, but very close. This patch is dependent on two outstanding patches that have not yet been committed to mainline: 1) the cleanup patch for the initial OProfile Cell PPU support (posted on Nov 27); and 2) the spu notifier patch (posted on Dec 1), which exports the [un]register functions in SPUFS. It is also dependent on an internal patch, which will be submitted once the cleanup patch is committed. So please don't try to apply this patch to a source tree yet. The code is functional and passing all test scenarios, except one: if an SPU task is already active before OProfile is started, the notification we receive for this already-active task happens in the wrong context, so we're unable to collect the information we need about the SPU binary being executed. The spu notifer patch mentioned above was meant to solve this problem, but in fact, it does not (which is why that patch hasn't been committed yet). We're currently investigating other options. All comments are welcome. NOTE: The availability of the developers of this patch is limited between now and Jan 2, 2007, so replies to comments may be delayed until then. Thanks. Maynard Johnson IBM LTC Toolchain Subject: Add support to OProfile for profiling Cell BE SPUs From: Maynard Johnson [EMAIL PROTECTED] This patch updates the existing arch/powerpc/oprofile/op_model_cell.c to add in the SPU profiling capabilities. In addition, a 'cell' subdirectory was added to arch/powerpc/oprofile to hold Cell-specific SPU profiling code. Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/configs/cell_defconfig 2006-12-04 10:56:04.699570280 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/configs/cell_defconfig 2006-12-08 10:38:58.049707128 -0600 @@ -1136,7 +1136,7 @@ # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m # CONFIG_KPROBES is not set # Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h === --- /dev/null 1970-01-01 00:00:00.0 + +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/pr_util.h 2006-12-14 11:58:04.393836120 -0600 @@ -0,0 +1,74 @@ + /* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef PR_UTIL_H +#define PR_UTIL_H + +#include linux/cpumask.h +#include linux/oprofile.h +#include asm/cell-pmu.h +#include asm/spu.h + +#define number_of_online_nodes(nodes) \ +u32 cpu; u32 tmp; \ +nodes = 0; \ +for_each_online_cpu(cpu) { \ +tmp = cbe_cpu_to_node(cpu) + 1;\ +if (tmp nodes) \ +nodes++; \ + } + + +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +typedef struct vma_map +{ + struct vma_map *next; + unsigned int vma; + unsigned int size; + unsigned int offset; + unsigned int guard_ptr; + unsigned int guard_val; +} vma_map_t; + +/* The three functions below are for maintaining and accessing + * the vma-to-file offset map. + */ +vma_map_t * create_vma_map(const struct spu * spu, u64 objectid); +unsigned int vma_map_lookup(vma_map_t *map, unsigned int vma, + const struct spu * aSpu); +void vma_map_free(struct vma_map *map); + +/* + * Entry point for SPU profiling. + * cycles_reset is the SPU_CYCLES count value specified by the user. + */ +void start_spu_profiling(unsigned int cycles_reset); + +void stop_spu_profiling(void); + + +/* add the necessary profiling hooks */ +int spu_sync_start(void); + +/* remove the hooks */ +int spu_sync_stop(void); + +/* Record SPU program counter samples to the oprofile event buffer. */ +void spu_sync_buffer(int spu_num, unsigned int * samples, +int num_samples); + +#endif// PR_UTIL_H Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/cell/spu_profiler.c === --- /dev/null 1970-01-01 00:00:00.0
Re: [Cbe-oss-dev] [PATCH]Add notification for active Cell SPU tasks
Luke Browning wrote: [EMAIL PROTECTED] wrote on 08/12/2006 01:04:30 PM: > Arnd Bergmann wrote: > > >On Wednesday 06 December 2006 23:04, Maynard Johnson wrote: > > > >No code should ever need to look at other SPUs when performing an > >operation on a given SPU, so we don't need to hold a global lock > >during normal operation. > > > >We have two cases that need to be handled: > > > >- on each context unload and load (both for a full switch operation), > > call to the profiling code with a pointer to the current context > > and spu (context is NULL when unloading). > > > > If the new context is not know yet, scan its overlay table (expensive) > > and store information about it in an oprofile private object. Otherwise > > just point to the currently active object, this should be really cheap. > > > >- When enabling oprofile initially, scan all contexts that are currently > > running on one of the SPUs. This is also expensive, but should happen > > before the measurement starts so it does not impact the resulting data. > > Agreed. > >>I'm not exactly sure what you're saying here. Are you suggesting that a > >>user may only be interested in acitve SPU notification and, therefore, > >>shouldn't have to be depenent on the "standard" notification > >>registration succeeding? There may be a case for adding a new > >>registration function, I suppose; although, I'm not aware of any other > >>users of the SPUFS notification mechanism besides OProfile and PDT, and > >>we need notification of both active and future SPU tasks. But I would > >>not object to a new function. > >> > >> > >> > >I think what Luke was trying to get to is that notify_spus_active() should > >not call blocking_notifier_call_chain(), since it will notify other users > >as well as the newly registered one. Instead, it can simply call the > >notifier function directly. > > > > > Ah, yes. Thanks to both of you for pointing that out. I'll fix that > and re-post. > > -Maynard > I actually was hoping to take this one step further. If the interface to the context switch handler is something like: switch_handler(int spu_id, from_ctx, to_ctx) The function prototype for the switch handler is set in concrete by the notification framework. The parameters are: struct notifier_block *, unsigned long, void *. The kernel extension can maintain an internal spu table of its own where it marks the named spuid as active or not. You don't need to have a bunch of individual calls. Internally, you can keep track of it yourself. I think this would be nice to have, and I will look into it as I have time. However, for the existing usage of the SPU switch notification, I don't think it's too critical, since most users are not going to be trying to do profiling or debugging with multiple SPU apps running simultaneously. Luke - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [PATCH]Add notification for active Cell SPU tasks
Luke Browning wrote: [EMAIL PROTECTED] wrote on 08/12/2006 01:04:30 PM: Arnd Bergmann wrote: On Wednesday 06 December 2006 23:04, Maynard Johnson wrote: No code should ever need to look at other SPUs when performing an operation on a given SPU, so we don't need to hold a global lock during normal operation. We have two cases that need to be handled: - on each context unload and load (both for a full switch operation), call to the profiling code with a pointer to the current context and spu (context is NULL when unloading). If the new context is not know yet, scan its overlay table (expensive) and store information about it in an oprofile private object. Otherwise just point to the currently active object, this should be really cheap. - When enabling oprofile initially, scan all contexts that are currently running on one of the SPUs. This is also expensive, but should happen before the measurement starts so it does not impact the resulting data. Agreed. snip I'm not exactly sure what you're saying here. Are you suggesting that a user may only be interested in acitve SPU notification and, therefore, shouldn't have to be depenent on the standard notification registration succeeding? There may be a case for adding a new registration function, I suppose; although, I'm not aware of any other users of the SPUFS notification mechanism besides OProfile and PDT, and we need notification of both active and future SPU tasks. But I would not object to a new function. I think what Luke was trying to get to is that notify_spus_active() should not call blocking_notifier_call_chain(), since it will notify other users as well as the newly registered one. Instead, it can simply call the notifier function directly. Ah, yes. Thanks to both of you for pointing that out. I'll fix that and re-post. -Maynard I actually was hoping to take this one step further. If the interface to the context switch handler is something like: switch_handler(int spu_id, from_ctx, to_ctx) The function prototype for the switch handler is set in concrete by the notification framework. The parameters are: struct notifier_block *, unsigned long, void *. The kernel extension can maintain an internal spu table of its own where it marks the named spuid as active or not. You don't need to have a bunch of individual calls. Internally, you can keep track of it yourself. I think this would be nice to have, and I will look into it as I have time. However, for the existing usage of the SPU switch notification, I don't think it's too critical, since most users are not going to be trying to do profiling or debugging with multiple SPU apps running simultaneously. Luke - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[Patch] Add necessary #includes to asm-powerpc/spu.h
Subject: Add necessary #includes to asm-powerpc/spu.h. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds a couple of #includes to asm-powerpc/spu.h to prevent compilation warnings that can occur when spu.h is included from a source file where fs.h and notifier.h have not been included already. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/include/asm-powerpc/spu.h === --- linux-2.6.19-rc6-arnd1+patches.orig/include/asm-powerpc/spu.h 2006-12-04 10:56:17.406636032 -0600 +++ linux-2.6.19-rc6-arnd1+patches/include/asm-powerpc/spu.h 2006-12-08 10:38:10.069679312 -0600 @@ -24,6 +24,8 @@ #define _SPU_H #ifdef __KERNEL__ +#include +#include #include #include
Re: [PATCH]Add notification for active Cell SPU tasks -- updated patch
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register to notify the caller of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-08 09:04:40.558774376 -0600 @@ -84,15 +84,36 @@ ctx ? ctx->object_id : 0, spu); } +static void notify_spus_active(struct notifier_block * n) +{ + int node; + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); + list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; + n->notifier_call(n, ctx ? ctx->object_id : 0, spu); + } + mutex_unlock(_prio->active_mutex[node]); + } + +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(n); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx)
Re: [Cbe-oss-dev] [PATCH]Add notification for active Cell SPU tasks
Arnd Bergmann wrote: On Wednesday 06 December 2006 23:04, Maynard Johnson wrote: text(struct spu *spu, struct spu_context *ctx) Is this really the right strategy? First, it serializes all spu context switching at the node level. Second, it performs 17 callouts for I could be wrong, but I think if we moved the mutex_lock to be inside of the list_for_each_entry loop, we could have a race condition. For example, we obtain the next spu item from the spu_prio->active_mutex list, then wait on the mutex which is being held for the purpose of removing the very spu context we just obtained. every context switch. Can't oprofile internally derive the list of active spus from the context switch callout. Arnd would certainly know the answer to this off the top of his head, but when I initially discussed the idea for this patch with him (probably a couple months ago or so), he didn't suggest a better alternative. Perhaps there is a way to do this with current SPUFS code. Arnd, any comments on this? No code should ever need to look at other SPUs when performing an operation on a given SPU, so we don't need to hold a global lock during normal operation. We have two cases that need to be handled: - on each context unload and load (both for a full switch operation), call to the profiling code with a pointer to the current context and spu (context is NULL when unloading). If the new context is not know yet, scan its overlay table (expensive) and store information about it in an oprofile private object. Otherwise just point to the currently active object, this should be really cheap. - When enabling oprofile initially, scan all contexts that are currently running on one of the SPUs. This is also expensive, but should happen before the measurement starts so it does not impact the resulting data. Also, the notify_spus_active() callout is dependent on the return code of spu_switch_notify(). Should notification be hierarchical? If I only register for the second one, should my notification be dependent on the return code of some non-related subsystem's handler. I'm not exactly sure what you're saying here. Are you suggesting that a user may only be interested in acitve SPU notification and, therefore, shouldn't have to be depenent on the "standard" notification registration succeeding? There may be a case for adding a new registration function, I suppose; although, I'm not aware of any other users of the SPUFS notification mechanism besides OProfile and PDT, and we need notification of both active and future SPU tasks. But I would not object to a new function. I think what Luke was trying to get to is that notify_spus_active() should not call blocking_notifier_call_chain(), since it will notify other users as well as the newly registered one. Instead, it can simply call the notifier function directly. Ah, yes. Thanks to both of you for pointing that out. I'll fix that and re-post. -Maynard Arnd <>< - Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys - and earn cash http://www.techsay.com/default.php?page=join.php=sourceforge=DEVDEV ___ oprofile-list mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/oprofile-list - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH]Add notification for active Cell SPU tasks -- updated patch
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register to notify the caller of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-04 10:56:04.730698720 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-08 09:04:40.558774376 -0600 @@ -84,15 +84,36 @@ ctx ? ctx-object_id : 0, spu); } +static void notify_spus_active(struct notifier_block * n) +{ + int node; + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); + list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + n-notifier_call(n, ctx ? ctx-object_id : 0, spu); + } + mutex_unlock(spu_prio-active_mutex[node]); + } + +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(n); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx)
[Patch] Add necessary #includes to asm-powerpc/spu.h
Subject: Add necessary #includes to asm-powerpc/spu.h. From: Maynard Johnson [EMAIL PROTECTED] This patch adds a couple of #includes to asm-powerpc/spu.h to prevent compilation warnings that can occur when spu.h is included from a source file where fs.h and notifier.h have not been included already. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/include/asm-powerpc/spu.h === --- linux-2.6.19-rc6-arnd1+patches.orig/include/asm-powerpc/spu.h 2006-12-04 10:56:17.406636032 -0600 +++ linux-2.6.19-rc6-arnd1+patches/include/asm-powerpc/spu.h 2006-12-08 10:38:10.069679312 -0600 @@ -24,6 +24,8 @@ #define _SPU_H #ifdef __KERNEL__ +#include linux/fs.h +#include linux/notifier.h #include linux/workqueue.h #include linux/sysdev.h
Re: [PATCH]Add notification for active Cell SPU tasks
Luke Browning wrote: [EMAIL PROTECTED] wrote on 12/04/2006 10:26:57: > [EMAIL PROTECTED] wrote on > 01/12/2006 06:01:15 PM: > > > > > Subject: Enable SPU switch notification to detect currently activeSPU tasks. > > > > From: Maynard Johnson <[EMAIL PROTECTED]> > > > > This patch adds to the capability of spu_switch_event_register to notify the > > caller of currently active SPU tasks. It also exports > > spu_switch_event_register > > and spu_switch_event_unregister. > > > > Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> > > > > > > Index: linux-2.6.19-rc6- > > arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c > > === > > --- linux-2.6.19-rc6-arnd1+patches. > > orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-11-24 11:34: > > 44.884455680 -0600 > > +++ linux-2.6.19-rc6- > > arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-01 > > 13:57:21.864583264 -0600 > > @@ -84,15 +84,37 @@ > > ctx ? ctx->object_id : 0, spu); > > } > > > > +static void notify_spus_active(void) > > +{ > > + int node; > > + for (node = 0; node < MAX_NUMNODES; node++) { > > + struct spu *spu; > > + mutex_lock(_prio->active_mutex[node]); > > + list_for_each_entry(spu, _prio->active_list[node], list) { > > + struct spu_context *ctx = spu->ctx; > > + blocking_notifier_call_chain(_switch_notifier, > > + ctx ? ctx->object_id : 0, spu); > > + } > > + mutex_unlock(_prio->active_mutex[node]); > > +} > > + > > +} > > + > > int spu_switch_event_register(struct notifier_block * n) > > { > > - return blocking_notifier_chain_register(_switch_notifier, n); > > + int ret; > > + ret = blocking_notifier_chain_register(_switch_notifier, n); > > + if (!ret) > > + notify_spus_active(); > > + return ret; > > } > > +EXPORT_SYMBOL_GPL(spu_switch_event_register); > > > > int spu_switch_event_unregister(struct notifier_block * n) > > { > > return blocking_notifier_chain_unregister(_switch_notifier, n); > > } > > +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); > > > > > > static inline void bind_context(struct spu *spu, struct spu_context *ctx) > > Is this really the right strategy? First, it serializes all spu context > switching at the node level. Second, it performs 17 callouts for I could be wrong, but I think if we moved the mutex_lock to be inside of the list_for_each_entry loop, we could have a race condition. For example, we obtain the next spu item from the spu_prio->active_mutex list, then wait on the mutex which is being held for the purpose of removing the very spu context we just obtained. > every context > switch. Can't oprofile internally derive the list of active spus from the > context switch callout. Arnd would certainly know the answer to this off the top of his head, but when I initially discussed the idea for this patch with him (probably a couple months ago or so), he didn't suggest a better alternative. Perhaps there is a way to do this with current SPUFS code. Arnd, any comments on this? > > Also, the notify_spus_active() callout is dependent on the return code of > spu_switch_notify(). Should notification be hierarchical? If I > only register > for the second one, should my notification be dependent on the return code > of some non-related subsystem's handler. I'm not exactly sure what you're saying here. Are you suggesting that a user may only be interested in acitve SPU notification and, therefore, shouldn't have to be depenent on the "standard" notification registration succeeding? There may be a case for adding a new registration function, I suppose; although, I'm not aware of any other users of the SPUFS notification mechanism besides OProfile and PDT, and we need notification of both active and future SPU tasks. But I would not object to a new function. > > Does blocking_callchain_notifier internally check for the presence > of registered > handlers before it takes locks ...? We should ensure that there is > minimal overhead > when there are no registered handlers. I won't pretend to be expert enough to critique the performance of that code. > > Regards, > Luke___ Any comments to my questions above. Seems like oprofile / pdt could derive the list of acti
Re: [PATCH]Add notification for active Cell SPU tasks
Luke Browning wrote: [EMAIL PROTECTED] wrote on 12/04/2006 10:26:57: [EMAIL PROTECTED] wrote on 01/12/2006 06:01:15 PM: Subject: Enable SPU switch notification to detect currently activeSPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register to notify the caller of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6- arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches. orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-11-24 11:34: 44.884455680 -0600 +++ linux-2.6.19-rc6- arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-01 13:57:21.864583264 -0600 @@ -84,15 +84,37 @@ ctx ? ctx-object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); + list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + blocking_notifier_call_chain(spu_switch_notifier, + ctx ? ctx-object_id : 0, spu); + } + mutex_unlock(spu_prio-active_mutex[node]); +} + +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx) Is this really the right strategy? First, it serializes all spu context switching at the node level. Second, it performs 17 callouts for I could be wrong, but I think if we moved the mutex_lock to be inside of the list_for_each_entry loop, we could have a race condition. For example, we obtain the next spu item from the spu_prio-active_mutex list, then wait on the mutex which is being held for the purpose of removing the very spu context we just obtained. every context switch. Can't oprofile internally derive the list of active spus from the context switch callout. Arnd would certainly know the answer to this off the top of his head, but when I initially discussed the idea for this patch with him (probably a couple months ago or so), he didn't suggest a better alternative. Perhaps there is a way to do this with current SPUFS code. Arnd, any comments on this? Also, the notify_spus_active() callout is dependent on the return code of spu_switch_notify(). Should notification be hierarchical? If I only register for the second one, should my notification be dependent on the return code of some non-related subsystem's handler. I'm not exactly sure what you're saying here. Are you suggesting that a user may only be interested in acitve SPU notification and, therefore, shouldn't have to be depenent on the standard notification registration succeeding? There may be a case for adding a new registration function, I suppose; although, I'm not aware of any other users of the SPUFS notification mechanism besides OProfile and PDT, and we need notification of both active and future SPU tasks. But I would not object to a new function. Does blocking_callchain_notifier internally check for the presence of registered handlers before it takes locks ...? We should ensure that there is minimal overhead when there are no registered handlers. I won't pretend to be expert enough to critique the performance of that code. Regards, Luke___ Any comments to my questions above. Seems like oprofile / pdt could derive the list of active spus from a single context switch callout. This patch will have a large impact on the performance of the system. For OProfile, the registration is only done at the time when a user starts the profiler to collect performance data, typically focusing on a single application, so I don't see this as an impact on normal production operations. Since you must have root authority to run OProfile, it cannot be invoked by just any user for nefarious purposes. -Maynard Luke ___ Linuxppc-dev mailing list [EMAIL PROTECTED] https://ozlabs.org/mailman/listinfo/linuxppc-dev - To unsubscribe
Re: [Cbe-oss-dev] [PATCH]Add notification for active Cell SPU tasks
Arnd Bergmann wrote: On Friday 01 December 2006 21:01, Maynard Johnson wrote: +static void notify_spus_active(void) +{ + int node; + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); + list_for_each_entry(spu, _prio->active_list[node], list) { +struct spu_context *ctx = spu->ctx; + blocking_notifier_call_chain(_switch_notifier, +ctx ? ctx->object_id : 0, spu); + } + mutex_unlock(_prio->active_mutex[node]); +} I wonder if this is enough for oprofile. Don't you need to access user space data of the task running on the SPU? I always assumed you want No, I don't need anything from the user app besides access to the executable binary, which I get with copy_from_user, specifying the 'objectid' as from address. to do it via get_user or copy_from_user, which obviously doesn't work here, when you're running in the oprofile task. Are you using something like access_process_vm now? Arnd <>< - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Cbe-oss-dev] [PATCH]Add notification for active Cell SPU tasks
Arnd Bergmann wrote: On Friday 01 December 2006 21:01, Maynard Johnson wrote: +static void notify_spus_active(void) +{ + int node; + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); + list_for_each_entry(spu, spu_prio-active_list[node], list) { +struct spu_context *ctx = spu-ctx; + blocking_notifier_call_chain(spu_switch_notifier, +ctx ? ctx-object_id : 0, spu); + } + mutex_unlock(spu_prio-active_mutex[node]); +} I wonder if this is enough for oprofile. Don't you need to access user space data of the task running on the SPU? I always assumed you want No, I don't need anything from the user app besides access to the executable binary, which I get with copy_from_user, specifying the 'objectid' as from address. to do it via get_user or copy_from_user, which obviously doesn't work here, when you're running in the oprofile task. Are you using something like access_process_vm now? Arnd - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH]Add notification for active Cell SPU tasks
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson <[EMAIL PROTECTED]> This patch adds to the capability of spu_switch_event_register to notify the caller of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-11-24 11:34:44.884455680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-01 13:57:21.864583264 -0600 @@ -84,15 +84,37 @@ ctx ? ctx->object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + for (node = 0; node < MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(_prio->active_mutex[node]); + list_for_each_entry(spu, _prio->active_list[node], list) { + struct spu_context *ctx = spu->ctx; + blocking_notifier_call_chain(_switch_notifier, + ctx ? ctx->object_id : 0, spu); + } + mutex_unlock(_prio->active_mutex[node]); + } + +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx)
[PATCH]Add notification for active Cell SPU tasks
Subject: Enable SPU switch notification to detect currently active SPU tasks. From: Maynard Johnson [EMAIL PROTECTED] This patch adds to the capability of spu_switch_event_register to notify the caller of currently active SPU tasks. It also exports spu_switch_event_register and spu_switch_event_unregister. Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/platforms/cell/spufs/sched.c 2006-11-24 11:34:44.884455680 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/platforms/cell/spufs/sched.c 2006-12-01 13:57:21.864583264 -0600 @@ -84,15 +84,37 @@ ctx ? ctx-object_id : 0, spu); } +static void notify_spus_active(void) +{ + int node; + for (node = 0; node MAX_NUMNODES; node++) { + struct spu *spu; + mutex_lock(spu_prio-active_mutex[node]); + list_for_each_entry(spu, spu_prio-active_list[node], list) { + struct spu_context *ctx = spu-ctx; + blocking_notifier_call_chain(spu_switch_notifier, + ctx ? ctx-object_id : 0, spu); + } + mutex_unlock(spu_prio-active_mutex[node]); + } + +} + int spu_switch_event_register(struct notifier_block * n) { - return blocking_notifier_chain_register(spu_switch_notifier, n); + int ret; + ret = blocking_notifier_chain_register(spu_switch_notifier, n); + if (!ret) + notify_spus_active(); + return ret; } +EXPORT_SYMBOL_GPL(spu_switch_event_register); int spu_switch_event_unregister(struct notifier_block * n) { return blocking_notifier_chain_unregister(spu_switch_notifier, n); } +EXPORT_SYMBOL_GPL(spu_switch_event_unregister); static inline void bind_context(struct spu *spu, struct spu_context *ctx)
[PATCH 1/1]OProfile for Cell cleanup patch
This is a clean up patch that includes the following changes: -It removes some macro definitions that are only used once with the actual code. -Some comments were added to clarify the code based on feedback from the community. -The write_pm_cntrl() and set_count_mode() were passed a structure element from a global variable. The argument was removed so the functions now just operate on the global directly. -The set_pm_event() function call in the cell_virtual_cntr() routine was moved to a for-loop before the for_each_cpu loop Signed-off-by: Carl Love <[EMAIL PROTECTED]> Signed-off-by: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/op_model_cell.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/oprofile/op_model_cell.c 2006-11-24 11:34:45.048430752 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/op_model_cell.c 2006-11-27 11:45:00.737473968 -0600 @@ -41,8 +41,12 @@ #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ #define CBE_COUNT_ALL_CYCLES 0x4280/* PPU cycle event specifier */ -#define NUM_THREADS 2 -#define VIRT_CNTR_SW_TIME_NS 1 // 0.5 seconds +#define NUM_THREADS 2 /* number of physical threads in + * physical processor + */ +#define NUM_TRACE_BUS_WORDS 4 +#define NUM_INPUT_BUS_WORDS 2 + struct pmc_cntrl_data { unsigned long vcntr; @@ -94,14 +98,6 @@ } pm_regs; -#define GET_SUB_UNIT(x) ((x & 0xf000) >> 12) -#define GET_BUS_WORD(x) ((x & 0x00f0) >> 4) -#define GET_BUS_TYPE(x) ((x & 0x0300) >> 8) -#define GET_POLARITY(x) ((x & 0x0002) >> 1) -#define GET_COUNT_CYCLES(x) (x & 0x0001) -#define GET_INPUT_CONTROL(x) ((x & 0x0004) >> 2) - - static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; @@ -129,8 +125,8 @@ static u32 ctr_enabled; -static unsigned char trace_bus[4]; -static unsigned char input_bus[2]; +static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; +static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; /* * Firmware interface functions @@ -221,24 +217,32 @@ pm_regs.pm07_cntrl[ctr] = 0; } - bus_word = GET_BUS_WORD(unit_mask); - bus_type = GET_BUS_TYPE(unit_mask); - count_cycles = GET_COUNT_CYCLES(unit_mask); - polarity = GET_POLARITY(unit_mask); - input_control = GET_INPUT_CONTROL(unit_mask); + bus_word = (unit_mask & 0x00f0) >> 4; + bus_type = (unit_mask & 0x0300) >> 8; + count_cycles = unit_mask & 0x0001; + polarity = (unit_mask & 0x0002) >> 1; + input_control = (unit_mask & 0x0004) >> 2; signal_bit = (event % 100); p = &(pm_signal[ctr]); p->signal_group = event / 100; p->bus_word = bus_word; - p->sub_unit = unit_mask & 0xf000; + p->sub_unit = (unit_mask & 0xf000) >> 12; pm_regs.pm07_cntrl[ctr] = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); - + pm_regs.pm07_cntrl[ctr] |= (count_cycles & 1) << 23; + pm_regs.pm07_cntrl[ctr] |= (polarity & 1) << 24; + pm_regs.pm07_cntrl[ctr] |= (input_control & 1) << 25; + + /* Some of the islands signal selection is based on 64 bit words. +* The debug bus words are 32 bits, the input words to the performance +* counters are defined as 32 bits. Need to convert the 64 bit island +* specification to the appropriate 32 input bit and bus word for the +* performance counter event selection. See the CELL Performance +* monitoring signals manual and the Perf cntr hardware descriptions +* for the details. +*/ if (input_control == 0) { if (signal_bit > 31) { signal_bit -= 32; @@ -253,18 +257,18 @@ if ((bus_type == 1) && p->signal_group >= 50) bus_type = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit); + pm_regs.pm07_cntrl[ctr] |= (signal_bit & 0x3F) << 26; } else { pm_regs.pm07_cntrl[ctr] = 0; p->bit = signal_bit; } - for (i = 0; i < 4; i++) { + for (i = 0; i < NUM_TRACE_BUS_WORDS; i++) { if (bus_word & (1 << i)) { pm_regs.debug_bus_control |=
[PATCH 0/1]OProfile for Cell cleanup patch
I will be posting a Cell-OProfile cleanup patch against Arnd Bergmann's 2.6.19-rc6-arnd1 tree (see http://kernel.org/pub/linux/kernel/people/arnd/patches/2.6.19-rc6-arnd1/). Thanks in advance for any comments provided. -Maynard - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/1]OProfile for Cell cleanup patch
This is a clean up patch that includes the following changes: -It removes some macro definitions that are only used once with the actual code. -Some comments were added to clarify the code based on feedback from the community. -The write_pm_cntrl() and set_count_mode() were passed a structure element from a global variable. The argument was removed so the functions now just operate on the global directly. -The set_pm_event() function call in the cell_virtual_cntr() routine was moved to a for-loop before the for_each_cpu loop Signed-off-by: Carl Love [EMAIL PROTECTED] Signed-off-by: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/op_model_cell.c === --- linux-2.6.19-rc6-arnd1+patches.orig/arch/powerpc/oprofile/op_model_cell.c 2006-11-24 11:34:45.048430752 -0600 +++ linux-2.6.19-rc6-arnd1+patches/arch/powerpc/oprofile/op_model_cell.c 2006-11-27 11:45:00.737473968 -0600 @@ -41,8 +41,12 @@ #define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */ #define CBE_COUNT_ALL_CYCLES 0x4280/* PPU cycle event specifier */ -#define NUM_THREADS 2 -#define VIRT_CNTR_SW_TIME_NS 1 // 0.5 seconds +#define NUM_THREADS 2 /* number of physical threads in + * physical processor + */ +#define NUM_TRACE_BUS_WORDS 4 +#define NUM_INPUT_BUS_WORDS 2 + struct pmc_cntrl_data { unsigned long vcntr; @@ -94,14 +98,6 @@ } pm_regs; -#define GET_SUB_UNIT(x) ((x 0xf000) 12) -#define GET_BUS_WORD(x) ((x 0x00f0) 4) -#define GET_BUS_TYPE(x) ((x 0x0300) 8) -#define GET_POLARITY(x) ((x 0x0002) 1) -#define GET_COUNT_CYCLES(x) (x 0x0001) -#define GET_INPUT_CONTROL(x) ((x 0x0004) 2) - - static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values); static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS]; @@ -129,8 +125,8 @@ static u32 ctr_enabled; -static unsigned char trace_bus[4]; -static unsigned char input_bus[2]; +static unsigned char trace_bus[NUM_TRACE_BUS_WORDS]; +static unsigned char input_bus[NUM_INPUT_BUS_WORDS]; /* * Firmware interface functions @@ -221,24 +217,32 @@ pm_regs.pm07_cntrl[ctr] = 0; } - bus_word = GET_BUS_WORD(unit_mask); - bus_type = GET_BUS_TYPE(unit_mask); - count_cycles = GET_COUNT_CYCLES(unit_mask); - polarity = GET_POLARITY(unit_mask); - input_control = GET_INPUT_CONTROL(unit_mask); + bus_word = (unit_mask 0x00f0) 4; + bus_type = (unit_mask 0x0300) 8; + count_cycles = unit_mask 0x0001; + polarity = (unit_mask 0x0002) 1; + input_control = (unit_mask 0x0004) 2; signal_bit = (event % 100); p = (pm_signal[ctr]); p-signal_group = event / 100; p-bus_word = bus_word; - p-sub_unit = unit_mask 0xf000; + p-sub_unit = (unit_mask 0xf000) 12; pm_regs.pm07_cntrl[ctr] = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity); - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control); - + pm_regs.pm07_cntrl[ctr] |= (count_cycles 1) 23; + pm_regs.pm07_cntrl[ctr] |= (polarity 1) 24; + pm_regs.pm07_cntrl[ctr] |= (input_control 1) 25; + + /* Some of the islands signal selection is based on 64 bit words. +* The debug bus words are 32 bits, the input words to the performance +* counters are defined as 32 bits. Need to convert the 64 bit island +* specification to the appropriate 32 input bit and bus word for the +* performance counter event selection. See the CELL Performance +* monitoring signals manual and the Perf cntr hardware descriptions +* for the details. +*/ if (input_control == 0) { if (signal_bit 31) { signal_bit -= 32; @@ -253,18 +257,18 @@ if ((bus_type == 1) p-signal_group = 50) bus_type = 0; - pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit); + pm_regs.pm07_cntrl[ctr] |= (signal_bit 0x3F) 26; } else { pm_regs.pm07_cntrl[ctr] = 0; p-bit = signal_bit; } - for (i = 0; i 4; i++) { + for (i = 0; i NUM_TRACE_BUS_WORDS; i++) { if (bus_word (1 i)) { pm_regs.debug_bus_control |= (bus_type (31 - (2 * i) + 1)); - for (j = 0; j 2; j++) { + for (j = 0; j NUM_INPUT_BUS_WORDS; j++) { if (input_bus[j] == 0xff) { input_bus[j] = i
[PATCH 0/1]OProfile for Cell cleanup patch
I will be posting a Cell-OProfile cleanup patch against Arnd Bergmann's 2.6.19-rc6-arnd1 tree (see http://kernel.org/pub/linux/kernel/people/arnd/patches/2.6.19-rc6-arnd1/). Thanks in advance for any comments provided. -Maynard - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC, Patch 1/1] OProfile for Cell: Initial profiling support -- updated patch
Add PPU event-based and cycle-based profiling support to Oprofile for Cell. Oprofile is expected to collect data on all CPUs simultaneously. However, there is one set of performance counters per node. There are two hardware threads or virtual CPUs on each node. Hence, OProfile must multiplex in time the performance counter collection on the two virtual CPUs. The multiplexing of the performance counters is done by a virtual counter routine. Initially, the counters are configured to collect data on the even CPUs in the system, one CPU per node. In order to capture the PC for the virtual CPU when the performance counter interrupt occurs (the specified number of events between samples has occurred), the even processors are configured to handle the performance counter interrupts for their node. The virtual counter routine is called via a kernel timer after the virtual sample time. The routine stops the counters, saves the current counts, loads the last counts for the other virtual CPU on the node, sets interrupts to be handled by the other virtual CPU and restarts the counters, the virtual timer routine is scheduled to run again. The virtual sample time is kept relatively small to make sure sampling occurs on both CPUs on the node with a relatively small granularity. Whenever the counters overflow, the performance counter interrupt is called to collect the PC for the CPU where data is being collected. The oprofile driver relies on a firmware RTAS call to setup the debug bus to route the desired signals to the performance counter hardware to be counted. The RTAS call must set the routing registers appropriately in each of the islands to pass the signals down the debug bus as well as routing the signals from a particular island onto the bus. There is a second firmware RTAS call to reset the debug bus to the non pass thru state when the counters are not in use. Signed-Off-By: Carl Love <[EMAIL PROTECTED]> Signed-Off-By: Maynard Johnson <[EMAIL PROTECTED]> Index: linux-2.6.18/arch/powerpc/configs/cell_defconfig === --- linux-2.6.18.orig/arch/powerpc/configs/cell_defconfig 2006-11-15 10:44:42.880315592 -0600 +++ linux-2.6.18/arch/powerpc/configs/cell_defconfig2006-11-16 09:13:23.929208256 -0600 @@ -1099,7 +1099,8 @@ # # Instrumentation Support # -# CONFIG_PROFILING is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y # CONFIG_KPROBES is not set # Index: linux-2.6.18/arch/powerpc/kernel/cputable.c === --- linux-2.6.18.orig/arch/powerpc/kernel/cputable.c2006-11-15 10:44:40.584264280 -0600 +++ linux-2.6.18/arch/powerpc/kernel/cputable.c 2006-11-16 09:13:23.932207800 -0600 @@ -304,6 +304,9 @@ PPC_FEATURE_SMT, .icache_bsize = 128, .dcache_bsize = 128, + .num_pmcs = 4, + .oprofile_cpu_type = "ppc64/cell-be", + .oprofile_type = PPC_OPROFILE_CELL, .platform = "ppc-cell-be", }, { /* PA Semi PA6T */ Index: linux-2.6.18/arch/powerpc/oprofile/common.c === --- linux-2.6.18.orig/arch/powerpc/oprofile/common.c2006-11-15 10:44:40.168195360 -0600 +++ linux-2.6.18/arch/powerpc/oprofile/common.c 2006-11-16 09:13:23.934207496 -0600 @@ -69,7 +69,10 @@ static int op_powerpc_start(void) { - on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); + if (model->global_start) + model->global_start(ctr); + if (model->start) + on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); return 0; } @@ -80,7 +83,10 @@ static void op_powerpc_stop(void) { - on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1); + if (model->stop) + on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1); +if (model->global_stop) +model->global_stop(); } static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) @@ -141,6 +147,11 @@ switch (cur_cpu_spec->oprofile_type) { #ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_CELL + case PPC_OPROFILE_CELL: + model = _model_cell; + break; +#endif case PPC_OPROFILE_RS64: model = _model_rs64; break; Index: linux-2.6.18/arch/powerpc/oprofile/Makefile === --- linux-2.6.18.orig/arch/powerpc/oprofile/Makefile2006-11-15 10:44:40.166195664 -0600 +++ linux-2.6.18/arch/powerpc/oprofile/Makefile 2006-11-16 09:13:23.935207344 -0600 @@ -11,6 +11,7 @@ timer_int.o ) oprofile-y := $(DRIVER_OBJS) common.o backtrace.o +oprofile-$(CONFIG_PPC_CELL) += op_mode
Re: [RFC, Patch 0/1] OProfile for Cell: Initial profiling support -- new patch
Based on review comments received so far, we will be posting an updated kernel patch for OProfile for Cell. Most notably, this patch removes some racey conditions between our "virtual counter" function and the interrupt handler, as well as fixing the way we were restarting and stopping our timer for the virtual counter. Thanks. -Maynard Maynard Johnson wrote: Hello, I will be posting a patch that updates the OProfile kernel driver to enable it for the Cell Broadband Engine processor. The patch is based on Arnd Bergmann's arnd6 patchset for 2.6.18 (http://kernel.org/pub/linux/kernel/people/arnd/patches/2.6.18-arnd6/), with Kevin Corry's cbe_pmu_interrupt patch on applied on top. Kevin Corry's patch was submitted to the mailing lists earlier today and can be found at: http://marc.theaimsgroup.com/?l=linux-kernel=116360639928471=2). I will also post an OProfile userpsace patch to the oprofile-list that adds the necessary support for the Cell processor. Thanks in advance for your review of this patch. Maynard Johnson LTC Power Linux Toolchain 507-253-2650 - Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT & business topics through brief surveys - and earn cash http://www.techsay.com/default.php?page=join.php=sourceforge=DEVDEV ___ oprofile-list mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/oprofile-list - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC, Patch 0/1] OProfile for Cell: Initial profiling support -- new patch
Based on review comments received so far, we will be posting an updated kernel patch for OProfile for Cell. Most notably, this patch removes some racey conditions between our virtual counter function and the interrupt handler, as well as fixing the way we were restarting and stopping our timer for the virtual counter. Thanks. -Maynard Maynard Johnson wrote: Hello, I will be posting a patch that updates the OProfile kernel driver to enable it for the Cell Broadband Engine processor. The patch is based on Arnd Bergmann's arnd6 patchset for 2.6.18 (http://kernel.org/pub/linux/kernel/people/arnd/patches/2.6.18-arnd6/), with Kevin Corry's cbe_pmu_interrupt patch on applied on top. Kevin Corry's patch was submitted to the mailing lists earlier today and can be found at: http://marc.theaimsgroup.com/?l=linux-kernelm=116360639928471w=2). I will also post an OProfile userpsace patch to the oprofile-list that adds the necessary support for the Cell processor. Thanks in advance for your review of this patch. Maynard Johnson LTC Power Linux Toolchain 507-253-2650 - Take Surveys. Earn Cash. Influence the Future of IT Join SourceForge.net's Techsay panel and you'll get the chance to share your opinions on IT business topics through brief surveys - and earn cash http://www.techsay.com/default.php?page=join.phpp=sourceforgeCID=DEVDEV ___ oprofile-list mailing list [EMAIL PROTECTED] https://lists.sourceforge.net/lists/listinfo/oprofile-list - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC, Patch 1/1] OProfile for Cell: Initial profiling support -- updated patch
Add PPU event-based and cycle-based profiling support to Oprofile for Cell. Oprofile is expected to collect data on all CPUs simultaneously. However, there is one set of performance counters per node. There are two hardware threads or virtual CPUs on each node. Hence, OProfile must multiplex in time the performance counter collection on the two virtual CPUs. The multiplexing of the performance counters is done by a virtual counter routine. Initially, the counters are configured to collect data on the even CPUs in the system, one CPU per node. In order to capture the PC for the virtual CPU when the performance counter interrupt occurs (the specified number of events between samples has occurred), the even processors are configured to handle the performance counter interrupts for their node. The virtual counter routine is called via a kernel timer after the virtual sample time. The routine stops the counters, saves the current counts, loads the last counts for the other virtual CPU on the node, sets interrupts to be handled by the other virtual CPU and restarts the counters, the virtual timer routine is scheduled to run again. The virtual sample time is kept relatively small to make sure sampling occurs on both CPUs on the node with a relatively small granularity. Whenever the counters overflow, the performance counter interrupt is called to collect the PC for the CPU where data is being collected. The oprofile driver relies on a firmware RTAS call to setup the debug bus to route the desired signals to the performance counter hardware to be counted. The RTAS call must set the routing registers appropriately in each of the islands to pass the signals down the debug bus as well as routing the signals from a particular island onto the bus. There is a second firmware RTAS call to reset the debug bus to the non pass thru state when the counters are not in use. Signed-Off-By: Carl Love [EMAIL PROTECTED] Signed-Off-By: Maynard Johnson [EMAIL PROTECTED] Index: linux-2.6.18/arch/powerpc/configs/cell_defconfig === --- linux-2.6.18.orig/arch/powerpc/configs/cell_defconfig 2006-11-15 10:44:42.880315592 -0600 +++ linux-2.6.18/arch/powerpc/configs/cell_defconfig2006-11-16 09:13:23.929208256 -0600 @@ -1099,7 +1099,8 @@ # # Instrumentation Support # -# CONFIG_PROFILING is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y # CONFIG_KPROBES is not set # Index: linux-2.6.18/arch/powerpc/kernel/cputable.c === --- linux-2.6.18.orig/arch/powerpc/kernel/cputable.c2006-11-15 10:44:40.584264280 -0600 +++ linux-2.6.18/arch/powerpc/kernel/cputable.c 2006-11-16 09:13:23.932207800 -0600 @@ -304,6 +304,9 @@ PPC_FEATURE_SMT, .icache_bsize = 128, .dcache_bsize = 128, + .num_pmcs = 4, + .oprofile_cpu_type = ppc64/cell-be, + .oprofile_type = PPC_OPROFILE_CELL, .platform = ppc-cell-be, }, { /* PA Semi PA6T */ Index: linux-2.6.18/arch/powerpc/oprofile/common.c === --- linux-2.6.18.orig/arch/powerpc/oprofile/common.c2006-11-15 10:44:40.168195360 -0600 +++ linux-2.6.18/arch/powerpc/oprofile/common.c 2006-11-16 09:13:23.934207496 -0600 @@ -69,7 +69,10 @@ static int op_powerpc_start(void) { - on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); + if (model-global_start) + model-global_start(ctr); + if (model-start) + on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1); return 0; } @@ -80,7 +83,10 @@ static void op_powerpc_stop(void) { - on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1); + if (model-stop) + on_each_cpu(op_powerpc_cpu_stop, NULL, 0, 1); +if (model-global_stop) +model-global_stop(); } static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) @@ -141,6 +147,11 @@ switch (cur_cpu_spec-oprofile_type) { #ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_CELL + case PPC_OPROFILE_CELL: + model = op_model_cell; + break; +#endif case PPC_OPROFILE_RS64: model = op_model_rs64; break; Index: linux-2.6.18/arch/powerpc/oprofile/Makefile === --- linux-2.6.18.orig/arch/powerpc/oprofile/Makefile2006-11-15 10:44:40.166195664 -0600 +++ linux-2.6.18/arch/powerpc/oprofile/Makefile 2006-11-16 09:13:23.935207344 -0600 @@ -11,6 +11,7 @@ timer_int.o ) oprofile-y := $(DRIVER_OBJS) common.o backtrace.o +oprofile-$(CONFIG_PPC_CELL) += op_model_cell.o oprofile-$(CONFIG_PPC64) += op_model_rs64.o