[PATCH] Make the scsi_wait_scan module only if SCSI_SCAN_ASYNC is enabled.
Allow for the possibility that even if modules are enabled that scsi_wait_scan is only made if SCSI_SCAN_ASYNC is enabled. Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- drivers/scsi/Kconfig |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index e62d23f..4ba6ee5 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -244,6 +244,7 @@ config SCSI_SCAN_ASYNC config SCSI_WAIT_SCAN tristate default m + depends on SCSI_SCAN_ASYNC depends on SCSI depends on MODULES -- 1.5.2.rc0.g520d - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/3] Trec driver.
This is the Trec driver, Makefile, header files. Enable trec in Kernel hacking configuration menu. Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- drivers/Makefile |1 + drivers/trec/Makefile |5 + drivers/trec/trec.c| 404 include/asm-generic/trec.h | 17 ++ include/asm-i386/trec.h| 33 include/asm-x86_64/trec.h | 13 ++ include/linux/trec.h | 75 lib/Kconfig.debug |7 + 8 files changed, 555 insertions(+), 0 deletions(-) create mode 100644 drivers/trec/Makefile create mode 100644 drivers/trec/trec.c create mode 100644 include/asm-generic/trec.h create mode 100644 include/asm-i386/trec.h create mode 100644 include/asm-x86_64/trec.h create mode 100644 include/linux/trec.h diff --git a/drivers/Makefile b/drivers/Makefile index 3a718f5..01724c0 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -81,3 +81,4 @@ obj-$(CONFIG_GENERIC_TIME)+= clocksource/ obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_HID) += hid/ obj-$(CONFIG_PPC_PS3) += ps3/ +obj-$(CONFIG_TREC) += trec/ diff --git a/drivers/trec/Makefile b/drivers/trec/Makefile new file mode 100644 index 000..d930b4d --- /dev/null +++ b/drivers/trec/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Trace records. +# + +obj-$(CONFIG_TREC) += trec.o diff --git a/drivers/trec/trec.c b/drivers/trec/trec.c new file mode 100644 index 000..0b04b71 --- /dev/null +++ b/drivers/trec/trec.c @@ -0,0 +1,404 @@ +/* + * Copyright (C) 2007 Saville Software, Inc. + * + * This code may be used for any purpose whatsoever, + * but no warranty of any kind is provided. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define TREC_DEBUG +#ifdef TREC_DEBUG +#define DPK(fmt, args...) printk(KERN_ERR "trec " fmt, ## args) +#else +#define DPK(fmt, args...) +#endif + +struct trec_dev_struct +{ + struct cdevcdev; /* Character device structure */ +}; + +MODULE_AUTHOR("Wink Saville"); +MODULE_LICENSE("Dual BSD/GPL"); + +/* + * Module parameters + */ +int major = 240; /* 240 a "local/expermental" device number for the moment */ +int minor = 1; + +module_param(major, int, S_IRUGO); +module_param(minor, int, S_IRUGO); + +/* + * Forward declarations + */ +static int trec_open(struct inode *inode, struct file *file); +static int trec_release(struct inode *inode, struct file *file); + +/* + * File operations + */ +struct file_operations trec_f_ops = { + .owner = THIS_MODULE, + .open = trec_open, + .release= trec_release, +}; + +struct trec_struct { + uint64_ttsc; + unsigned long pc; + unsigned long tsk; + unsigned intpid; + unsigned long v1; + unsigned long v2; +}; + +/* + * Change trec_buffer_struct.data to be a pointer to a PAGE in the future + */ +#define TREC_DATA_SIZE 0x200 +struct trec_buffer_struct { + struct trec_buffer_struct * next; + struct trec_struct *cur; + struct trec_struct *end; + struct trec_struct data[TREC_DATA_SIZE]; +}; + +/* + * Number of buffers must be a multiple of two so we can + * snapshot the buffers and the minimum should be 4. + */ +#defineTREC_COUNT 2 +struct trec_buffer_struct trec_buffers[2][TREC_COUNT]; +inttrec_idx = 0; +spinlock_t trec_lock = SPIN_LOCK_UNLOCKED; + +struct trec_buffer_struct *trec_buffer_cur = NULL; +struct trec_buffer_struct *trec_buffer_snapshot = NULL; + +struct trec_dev_struct trec_dev; + +/** + * Print an address symbol if available to the buffer + * this is from traps.c + */ +static int snprint_address(char *b, int bsize, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + unsigned long offset = 0, symsize; + const char *symname; + char *modname; + char *delim = ":"; + int n; + char namebuf[128]; + + symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); + if (!symname) { + n = 0; + } else { + if (!modname) + modname = delim = ""; + n = snprintf(b, bsize, "0x%016lx %s%s%s%s+0x%lx/0x%lx", + address, delim, modname, delim, symname, offset, symsize); + } + return n; +#else + return snprintf(b, bsize, "0x%016lx", address); +#endif +} + +/* + * Initialize the trec buffers + */ +void trec_init(void) +{ + int i; + int j; + + DPK("trec: trec_init E\n"); + + for (i = 0; i < 2; i++) { + for (j = 0; j < TREC_COUNT; j++) { + struct trec_buf
[PATCH 3/3] Initialize and use trec_snapshot and trec_print_snapshot.
Trec's are initialized early in main.c and then dump trec's in die(), panic() and do_page_fault(). Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- arch/x86_64/kernel/traps.c |5 + arch/x86_64/mm/fault.c |6 ++ init/main.c|4 kernel/panic.c |5 + 4 files changed, 20 insertions(+), 0 deletions(-) diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 09d2e8a..b4f1a36 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -547,9 +548,13 @@ void die(const char * str, struct pt_regs * regs, long err) { unsigned long flags = oops_begin(); + trec_snapshot(); + if (!user_mode(regs)) report_bug(regs->rip); + trec_print_snapshot(); + __die(str, regs, err); oops_end(flags); do_exit(SIGSEGV); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 6ada723..e92f6bc 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -535,6 +536,8 @@ no_context: flags = oops_begin(); + trec_snapshot(); + if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); else @@ -548,6 +551,9 @@ no_context: __die("Oops", regs, error_code); /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); + + trec_print_snapshot(); + oops_end(flags); do_exit(SIGKILL); diff --git a/init/main.c b/init/main.c index a92989e..46bc440 100644 --- a/init/main.c +++ b/init/main.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -517,6 +518,9 @@ asmlinkage void __init start_kernel(void) early_boot_irqs_off(); early_init_irq_lock_class(); + trec_init(); + TREC0(); + /* * Interrupts are still disabled. Do necessary setups, then * enable them diff --git a/kernel/panic.c b/kernel/panic.c index 623d182..52812f2 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -19,6 +19,7 @@ #include #include #include +#include int panic_on_oops; int tainted; @@ -66,6 +67,8 @@ NORET_TYPE void panic(const char * fmt, ...) unsigned long caller = (unsigned long) __builtin_return_address(0); #endif + trec_snapshot(); + /* * It's possible to come here directly from a panic-assertion and not * have preempt disabled. Some functions called from here want @@ -96,6 +99,8 @@ NORET_TYPE void panic(const char * fmt, ...) smp_send_stop(); #endif + trec_print_snapshot(); + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); if (!panic_blink) -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/3] Documention for trace records (trec).
Trec is a light weight tracing mechanism that places trace information into a buffer. The contents of the buffer is dumped when errors occurs or when enabled via SYSRQ commands. Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- Documentation/trec.txt | 87 1 files changed, 87 insertions(+), 0 deletions(-) create mode 100644 Documentation/trec.txt diff --git a/Documentation/trec.txt b/Documentation/trec.txt new file mode 100644 index 000..2275edd --- /dev/null +++ b/Documentation/trec.txt @@ -0,0 +1,87 @@ +Title : Trace Records +Authors: Wink Saville <[EMAIL PROTECTED]> + +CONTENTS + +1. Concepts +2. Architectures Supported +3. Configuring +4. API Reference +5. Overhead +6. TODO + + +1. Concepts + +Trace records are a light weight tracing technique that time stamps +small amounts of information and stores them in a buffer. TREC's are +light enough that they may be sprinkled most anywhere in the kernel +and have very little performance impact. + +For instance they can be placed in the scheduler and ISR's to watch +the interaction between ISR's and the scheduler. They can be placed +in memory handling routines to determine how and when memory is +allocated and freed. + +In the current default configuration the trec's are dumped by calling +trec_print_snapshot when die() or panic() are called as well as when +the kernel itself page faults in do_page_fault. + +If CONFIG_MAGIC_SYSRQ is the 'y' command will execute trec_snapshot +and the 'z' command will print the current snapshot. + +A general macro TREC allows trec_write to be invoked as a macro and +TRECC allows it to be invoked conditionally. See include/linux/trec.h +for the current set of macros. + +2. Architectures Supported + +Should support all architectures has been tested only on: + +- X86_64 + + +3. Configuring + +Since trec's are implemented as a device driver they are configured +by enabling support in the "Device Drivers" section of as they could +be used early being a module is not supported. + + +4. API Reference + +Trec supports the following API: + +void trec_init(void): + + Initialize the module, this may be called before the driver is loaded + if it is desired to use trec's early. + +void trec_write(unsigned long pc, int pid, unsigned long v1, unsigned long v2); + + This is the routine used to write into the buffer. pc is the program counter + pid is the process id and v1 and v2 are two parameters. + +void trec_snapshot(void); + + Calling this function takes a snapshot of the current trec buffer so that it + will not be modified. This is called prior to printing the snapshot via + trec_print_snapshot. + +void trec_print_snapshot(void); + + Print the snapshot. + +5. Overhead + +Measured on a 2.4GHZ Core 2 Duo the readings between two TREC's is +270 tics of the rdtsc or about 0.1us. No attempt has been made to +optimize and less information can be collected if the overhead +is still to high. + + +6. TODO + +a. Add code to dump trec to user space +b. Enhance to allow runtime registration and runtime enable disable. + -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/7] Initial implementation of the trec driver and include files
On 3/21/07, Johannes Weiner <[EMAIL PROTECTED]> wrote: Hi, On Wed, Mar 21, 2007 at 09:49:12AM -0700, Wink Saville wrote: > >>Please don't use camel-case - in general. > >> > Would p_next, p_cur and p_end be OK? I think it's generally disliked. Quoting Documentation/CodingStyle: ``Encoding the type of a function into the name (so-called Hungarian notation) is brain damaged - the compiler knows the types anyway and can check those, and it only confuses the programmer. No wonder MicroSoft makes buggy programs.'' I'll change it to; next, cur, end (when in rome do as the romans) You can implement a sysctl-setting to enable/disable the whole system (which then would be runtime changeable). Generally the way I've used this in the kernel is sprinkle them liberally where I'm trying to track down a bug or understand how some code works and then disable some of them using ZREC's but then reenable as necessary. Finally, I remove them all usually by reverting to the original code. In userland I have a more sophisticated version which combines TREC's and printf's in one macro with a "debug variable" defined as a set of bits controlling the enable/disable sets of these. For example: DPRP1(1 << 2, "This is a DPR with parameter=%d", xyz); #define DPRP1(__bits, __format, __param, ...) \ do { \ if (((__bits) << 16) & dbg_variable) \ printf((__format), __param...); \ if ((__bits) & dbg_variable) \ TREC1((__param)); \ } while (0) The above divides "dbg_variable" into 2 16 bit fields, the upper 16 bits control if the printf is enabled and the lower 16 bits controls if the TREC1 is enabled. What I envision happening would possibly to expose the "dbg_variable" via procfs where it could "easily" be modified from userland. Anyway, for my immediate needs I just needed the TREC's so I could understand the inner workings of the kernel better and assist in debugging. I've submitted it as a patch incase anyone might be interested. Actually, are you interested in using them? In anycase thanks for the feed back. Wink - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH 2/7] Initial implementation of the trec driver and include files
Johannes Weiner wrote: Your patch has broken lines where there shouldn't be any. OK. + struct trec_buffer_struct * pNext; + struct trec_struct *pCur; + struct trec_struct *pEnd; Please don't use camel-case - in general. Would p_next, p_cur and p_end be OK? +static int snprint_address(char *b, int bsize, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + unsigned long offset = 0, symsize; +#else + return snprintf(b, bsize, "0x%016lx", address); +#endif +} Would it make sense to #ifdef the whole function? Make it static int (*)(...) if CONFIG_KALLSYMS and otherwise just a static inline int (*)(...) { return 0; } Maybe, but I think just letting the compiler decide is better. [...] +static int trec_device_init(void) +{ + int result; + DPK("trec_device_init: cdev_add failed\n"); + goto done; If you jump to `done' here, unregister_chrdev_region is never called. You should also declare trec_device_init as __init and trex_device_exit as __exit. I'll fix this. --- /dev/null +++ b/include/linux/trec.h @@ -0,0 +1,34 @@ +/* +#define TREC0()trec_write(TREC_PC_ADDR, TREC_PID, 0, 0) + +#define ZREC0()do { } while (0) Why not seperate them by an #ifndef? So you don't have to replace TREC? with ZREC? but rather change one #define knob. =Hannes The reason is to allow the user easily change individual TREC's from active to inactive by just changing a single character. Eventually I envision adding runtime support for changing if a particular set of TREC's are active or not, but this was simple. Thanks for the feed back. Wink - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 7/7] Add trec_snapshot and trec_print_snapshot in panic()
Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- kernel/panic.c | 12 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/kernel/panic.c b/kernel/panic.c index 623d182..64a047e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -20,6 +20,10 @@ #include #include +#ifdef CONFIG_TREC +#include +#endif + int panic_on_oops; int tainted; static int pause_on_oops; @@ -66,6 +70,10 @@ NORET_TYPE void panic(const char * fmt, ...) unsigned long caller = (unsigned long) __builtin_return_address(0); #endif +#ifdef CONFIG_TREC + trec_snapshot(); +#endif + /* * It's possible to come here directly from a panic-assertion and not * have preempt disabled. Some functions called from here want @@ -96,6 +104,10 @@ NORET_TYPE void panic(const char * fmt, ...) smp_send_stop(); #endif +#ifdef CONFIG_TREC + trec_print_snapshot(); +#endif + atomic_notifier_call_chain(&panic_notifier_list, 0, buf); if (!panic_blink) -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 3/7] Modifications to drivers/Kconfig and Makefile to configure
Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- drivers/Kconfig |2 ++ drivers/Makefile |1 + 2 files changed, 3 insertions(+), 0 deletions(-) diff --git a/drivers/Kconfig b/drivers/Kconfig index 050323f..f05a2bf 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -84,4 +84,6 @@ source "drivers/auxdisplay/Kconfig" source "drivers/kvm/Kconfig" +source "drivers/trec/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 3a718f5..01724c0 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -81,3 +81,4 @@ obj-$(CONFIG_GENERIC_TIME)+= clocksource/ obj-$(CONFIG_DMA_ENGINE)+= dma/ obj-$(CONFIG_HID) += hid/ obj-$(CONFIG_PPC_PS3) += ps3/ +obj-$(CONFIG_TREC) += trec/ -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 4/7] Initialize trec early so it may be used early
Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- init/main.c |4 1 files changed, 4 insertions(+), 0 deletions(-) diff --git a/init/main.c b/init/main.c index a92989e..46bc440 100644 --- a/init/main.c +++ b/init/main.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -517,6 +518,9 @@ asmlinkage void __init start_kernel(void) early_boot_irqs_off(); early_init_irq_lock_class(); + trec_init(); + TREC0(); + /* * Interrupts are still disabled. Do necessary setups, then * enable them -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 5/7] Add trec_snapshot and trec_print_snapshot to die()
Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- arch/x86_64/kernel/traps.c | 12 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 09d2e8a..c730176 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -33,6 +33,10 @@ #include #include +#ifdef CONFIG_TREC +#include +#endif + #include #include #include @@ -547,9 +551,17 @@ void die(const char * str, struct pt_regs * regs, long err) { unsigned long flags = oops_begin(); +#ifdef CONFIG_TREC + trec_snapshot(); +#endif + if (!user_mode(regs)) report_bug(regs->rip); +#ifdef CONFIG_TREC + trec_print_snapshot(); +#endif + __die(str, regs, err); oops_end(flags); do_exit(SIGSEGV); -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 6/7] Added trec_snapshot and trec_print_snapshot to do_page_fault() when the kernel itself faults
Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- arch/x86_64/mm/fault.c | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 6ada723..9857ade 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -25,6 +25,10 @@ #include #include +#ifdef CONFIG_TREC +#include +#endif + #include #include #include @@ -534,6 +538,9 @@ no_context: */ flags = oops_begin(); +#ifdef CONFIG_TREC + trec_snapshot(); +#endif if (address < PAGE_SIZE) printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference"); @@ -548,6 +555,9 @@ no_context: __die("Oops", regs, error_code); /* Executive summary in case the body of the oops scrolled away */ printk(KERN_EMERG "CR2: %016lx\n", address); +#ifdef CONFIG_TREC + trec_print_snapshot(); +#endif oops_end(flags); do_exit(SIGKILL); -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/7] Initial implementation of the trec driver and include files
Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- drivers/trec/Kconfig | 14 ++ drivers/trec/Makefile |5 + drivers/trec/trec.c| 328 include/asm-generic/trec.h | 17 +++ include/asm-i386/trec.h| 33 + include/asm-x86_64/trec.h | 13 ++ include/linux/trec.h | 34 + 7 files changed, 444 insertions(+), 0 deletions(-) create mode 100644 drivers/trec/Kconfig create mode 100644 drivers/trec/Makefile create mode 100644 drivers/trec/trec.c create mode 100644 include/asm-generic/trec.h create mode 100644 include/asm-i386/trec.h create mode 100644 include/asm-x86_64/trec.h create mode 100644 include/linux/trec.h diff --git a/drivers/trec/Kconfig b/drivers/trec/Kconfig new file mode 100644 index 000..ef43f1f --- /dev/null +++ b/drivers/trec/Kconfig @@ -0,0 +1,14 @@ +# +# Trace record configuration +# + +menu "Trace record support" + +config TREC + def_bool n + bool "Trace record support" + ---help--- + Trace records are a light weight tracing facility + +endmenu + diff --git a/drivers/trec/Makefile b/drivers/trec/Makefile new file mode 100644 index 000..d930b4d --- /dev/null +++ b/drivers/trec/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Trace records. +# + +obj-$(CONFIG_TREC) += trec.o diff --git a/drivers/trec/trec.c b/drivers/trec/trec.c new file mode 100644 index 000..8d954ca --- /dev/null +++ b/drivers/trec/trec.c @@ -0,0 +1,328 @@ +/* + * Copyright (C) 2007 Saville Software, Inc. + * + * This code may be used for any purpose whatsoever, but + * no warranty of any kind is provided. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define TREC_DEBUG +#ifdef TREC_DEBUG +#define DPK(fmt, args...) printk(KERN_ERR "trec " fmt, ## args) +#else +#define DPK(fmt, args...) +#endif + +struct trec_dev_struct +{ + struct cdevcdev; /* Character device structure */ +}; + +MODULE_AUTHOR("Wink Saville"); +MODULE_LICENSE("Dual BSD/GPL"); + +/* + * Module parameters + */ +int major = 240; /* 240 a "local/expermental" device number for the moment */ +int minor = 1; + +module_param(major, int, S_IRUGO); +module_param(minor, int, S_IRUGO); + +/* + * Forward declarations + */ +static int trec_open(struct inode *inode, struct file *pFile); +static int trec_release(struct inode *inode, struct file *pFile); + +/* + * File operations + */ +struct file_operations trec_f_ops = { + .owner = THIS_MODULE, + .open = trec_open, + .release= trec_release, +}; + +struct trec_struct { + uint64_ttsc; + unsigned long pc; + unsigned long tsk; + unsigned intpid; + unsigned long v1; + unsigned long v2; +}; + +/* + * Change trec_buffer_struct.data to be a pointer to a PAGE in the future + */ +#define TREC_DATA_SIZE 0x200 +struct trec_buffer_struct { + struct trec_buffer_struct * pNext; + struct trec_struct *pCur; + struct trec_struct *pEnd; + struct trec_struct data[TREC_DATA_SIZE]; +}; + +/* + * Number of buffers must be a multiple of two so we can + * snapshot the buffers and the minimum should be 4. + */ +#defineTREC_COUNT 2 +struct trec_buffer_struct gTrec_buffers[2][TREC_COUNT]; +intgTrec_idx = 0; +spinlock_t gTrec_lock = SPIN_LOCK_UNLOCKED; + +struct trec_buffer_struct *pTrec_cur = NULL; +struct trec_buffer_struct *pTrec_snapshot = NULL; + +struct trec_dev_struct trec_dev; + +/** + * Print an address symbol if available to the buffer + * this is from traps.c + */ +static int snprint_address(char *b, int bsize, unsigned long address) +{ +#ifdef CONFIG_KALLSYMS + unsigned long offset = 0, symsize; + const char *symname; + char *modname; + char *delim = ":"; + int n; + char namebuf[128]; + + symname = kallsyms_lookup(address, &symsize, &offset, &modname, namebuf); + if (!symname) { + n = 0; + } else { + if (!modname) + modname = delim = ""; + n = snprintf(b, bsize, "0x%016lx %s%s%s%s+0x%lx/0x%lx", + address, delim, modname, delim, symname, offset, symsize); + } + return n; +#else + return snprintf(b, bsize, "0x%016lx", address); + return 0; +#endif +} + +/* + * Initialize the trec buffers + */ +void trec_init(void) +{ + int i; + int j; + + //DPK("trec: trec_init E\n"); + + for (i = 0; i < 2; i++) { + for (j = 0; j < TREC_COUNT; j++) { + struct trec_buffer_struct *pTrec = &gTrec_buffers[i
[PATCH 1/7] Documention for trace record (trec), a light weight tracing mechanism
Signed-off-by: Wink Saville <[EMAIL PROTECTED]> --- Documentation/trec.txt | 87 1 files changed, 87 insertions(+), 0 deletions(-) create mode 100644 Documentation/trec.txt diff --git a/Documentation/trec.txt b/Documentation/trec.txt new file mode 100644 index 000..e12a552 --- /dev/null +++ b/Documentation/trec.txt @@ -0,0 +1,87 @@ +Title : Trace Records +Authors: Wink Saville <[EMAIL PROTECTED]> + +CONTENTS + +1. Concepts +2. Architectures Supported +3. Configuring +4. API Reference +5. Overhead +6. TODO + + +1. Concepts + +Trace records are a light weight tracing technique that time stamps +small amounts of information and stores them in a buffer. TREC's are +light enough that they may be sprinkled most anywhere in the kernel +and have very little performance impact. + +For instance they can be placed in the scheduler and ISR's to watch +the interaction between ISR's and the scheduler. They can be placed +in memory handling routines to determine how and when memory is +allocated and freed. + +In the current default configuration the trec's are dumped by calling +trec_print_snapshot when die() or panic() are called as well as when +the kernel itself page faults in do_page_fault. + + +2. Architectures Supported + +Should support all architectures has been tested only on: + +- X86_64 + + +3. Configuring + +Since trec's are implemented as a device driver they are configured +by enabling support in the "Device Drivers" section of as they could +be used early being a module is not supported. + + +4. API Reference + +Trec supports the following API: + +void trec_init(void): + + Initialize the module, this may be called before the driver is loaded + if it is desired to use trec's early. + +void trec_write(unsigned long pc, int pid, unsigned long v1, unsigned long v2); + + This is the routine used to write into the buffer. pc is the program counter + pid is the process id and v1 and v2 are two parameters. + +void trec_snapshot(void); + + Calling this function takes a snapshot of the current trec buffer so that it + will not be modified. This is called prior to printing the snapshot via + trec_print_snapshot. + +void trec_print_snapshot(void); + + Print the snapshot. + +In addition a set of macros are defined for convience, they come in +two flavors, TRECxxx and ZRECxxx. The TRECxxx macros invoke trec_write +and the ZRECxxx macros do nothing allowing the macros to be quicly +disabled. Look at include/linux/trec.h for the current set of macros. + + +5. Overhead + +Measured on a 2.4GHZ Core 2 Duo the readings between two TREC's is +270 tics of the rdtsc or about 0.1us. No attempt has been made to +optimize and less information can be collected if the overhead +is still to high. + + +6. TODO + +a. Add code to dump trec to user space +b. Enhance to allow runtime registration and runtime enable disable. + -- 1.5.0.rc2 - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFC] Asynchronous Messaging
On 1/22/07, Alan <[EMAIL PROTECTED]> wrote: > This is accomplished by allocating a page (or more) of memory which > is executable and mapped into every threads address space. Also, all > ISR entry points are modified to detect if the code that was interrupted > was executing within the ACE page. If it was then the ACE code is > allowed to complete before the ISR continues. This then provides > the guarantee of atomic execution. What if you enter the ISR, pass the point of the check and then another CPU core hits the ACE space ? If CPU A has passed the point of the check then by definition the lock in the ACE space that it was holding will have been released and be available to CPU B, thus there will be no contention and CPU B will proceed to execute the code within the ACE space. Also how do you handle the case where the code gets stuck in your atomic pages ? The code in the ACE space must execute quickly and must never get stuck, the same rules as any code which holds spin locks. As I envision it the ACE space is "micro-code" provided by only the kernel and thus is bug free. Of course shit happens, for example I use ACE to manipulate shared linked lists. What happens if a pointer passed to the ACE code caused a page fault. This will cause the ISR to be reentered and is definitely a problem. But this can be detected and "fixed-up", i.e. release the spin lock and mark the faulting code to be killed and not rescheduled. My proof of concept code does not handle this situation but I believe it can be handled. A similar problem might occur if buggy or malicious code were to begin executing in the "middle" of the ACE space rather than at one of its entry points. Protection will need to put in place to handle this also. For instance if N ISR's in a row detect that the ACE space code has never stopped executing then kill the erroneous thread. Another idea would be to only allow "approved" code to use ACE. Wink - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFC] Asynchronous Messaging
I have implemented a technique which allows a kernel-space thread or ISR to communicate with user-space or kernel-space threads asynchronously and without having to copy data (zero copy). The solution I came up with I call ACE, Atomic Code Execution. As the name implies once code starts executing within the ACE environment, that code is guaranteed to complete before any other code will run. This is accomplished by allocating a page (or more) of memory which is executable and mapped into every threads address space. Also, all ISR entry points are modified to detect if the code that was interrupted was executing within the ACE page. If it was then the ACE code is allowed to complete before the ISR continues. This then provides the guarantee of atomic execution. Another way to look at it is that it gives user space programs the capability to disable/enable interrupts thus allowing user space code to execute the equivalent of spin_lock_irqsave() and spin_unlock_irqrestore(). I then implemented asynchronous messaging with zero copy by implementing link list operations within the ACE page, allocating the messages and auxiliary memory globally using vmalloc and adding the notion of a mproc (message processor) which encapsulates the a thread and a queue. I believe the ACE technique and the mproc idea could be used for several purposes beyond my desire to write event driven applications. In particular I could see it as a means of implementing device drivers written in user space as well as a possible technique for communicating with virtual machines such as Xen or KVM. Currently, the proof of concept code runs on an Core 2 Duo. For those that are interested the code is available as a patch against 2.6.19 at http://www.saville.com/linux/async. I have been using asynchronous messaging for 4+ years and have found that it provides very interesting properties, but is hindered because it is not directly supported by operating systems. I am very interested in getting feedback on the idea of including asynchronous messaging within the kernel. Thank you, Wink Saville - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch] x86: unify/rewrite SMP TSC sync code
Arjan van de Ven wrote: just to make sure, you do realize that when you write "ticks" that rdtsc doesn't measure cpu clock ticks or cpu cycles anymore, right? (At least not on your machine) Yes, that's why I wrote ticks and not cycles. At this point I'm not sure how to convert ticks to time on my machine, something else to learn:) Hopefully the HPET will resolve all of the issues. Wink - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [patch] x86: unify/rewrite SMP TSC sync code
Arjan van de Ven wrote: it's the cost of a syscall (1000 cycles?) plus what it takes to get a reasonable time estimate. Assuming your kernel has enough time support AND your tsc is reasonably ok, it'll be using that. If it's NOT using that then that's a pretty good sign that you can't also use it in userspace I wrote a quick and dirty program that I've attached to test the cost difference between RDTSC and gettimeofday (gtod), the results: [EMAIL PROTECTED]:~/linux/linux-2.6/test/rdtsc-pref$ time ./rdtsc-pref 1 rdtsc: average ticks= 65 gtod:average ticks= 222 gtod_us: average ticks= 232 real0m36.002s user0m35.997s sys 0m0.000s About a 3.5x cost difference, still for most of my uses gtod was not as costly as I had supposed. But, there are other uses that it wouldn't be acceptable. For instance, I have used a memory mapped time stamp counter in an embedded ARM based system for instrumenting the interrupt service routine, syscalls and task switches. For this type of instrumentation a gtod type call wouldn't have been suitable. Anyway for x86_64 systems, if I can use a memory mapped HPET counter, I might be able to have my cake and eat it too. One counter that can be used inside and outside the kernel that is cheap, precise and accurate, nirvana! We'll have to see. BTW my system is a 2.4ghz Core 2 duo running 2.6.19-rc6 with HPET enabled, in the attachment I've included my config file. Cheers, Wink rdtsc-pref.tgz Description: Binary data
Re: [patch] x86: unify/rewrite SMP TSC sync code
Robert Hancock wrote: Actually, we need to ask the CPU/System makers to provide a system wide Generally user mode code should just be using gettimeofday. When the TSC is usable as a sane time source, the kernel will use it. When it's not, it will use something else like the HPET, ACPI PM Timer or (at last resort) the PIT, in increasing degrees of slowness. But gettimeofday is much too expensive compared to RDTSC. Wink - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/