On Wed, 2006-04-12 at 18:56 +0200, Andi Kleen wrote: > Peter Wainwright <[EMAIL PROTECTED]> writes: > > Instead of executing all the GPE and Notify handlers in the single > > kacpid thread, we create a pool of worker threads and hand over the > > work to them. > > You do this at boot - this means you waste a lot of memory for this > obscure case. Not a good idea. If anything create/destroy them > dynamically.
Eh? the threads *are* created dynamically - and only if the user first enables this behaviour by writing to a /proc filesystem entry (e.g. in an init script). NO additional memory (except for the code itself) is used unless the user explicitly enables this behaviour. Also, I don't regard this as an "obscure case". Even if this bug were never triggered by any current hardware, it represents a divergence from the ACPI spec which could potentially bite any new hardware. We can't claim that Linux supports the full ACPI spec without some similar patch. > > Also externs belong in header files. > You're thinking of the declaration of acpi_os_initialize2? Well, there was an existing extern declaration: acpi_os_initialize1 - I just added another one :-). I don't want to expose this in a header because it is a purely private interface. The neatest solution would be to make an __initcall entry. However, this wouldn't work at the moment, because acpi_os_initialize2 creates an entry in /proc/acpi which does not exist until the bus.c initialization stuff. Is it possible to swap the order of bus.c and osl.c in the link line (I assume this swaps the order of the __initcalls)? Anyway, here is the second version of my patch. It has been tested pretty thoroughly against 2.6.16.4, and applies cleanly to 2.6.16.11. As promised, the thread pool entries are now destroyed dynamically; if no ACPI events occur in 10 seconds, all the extra threads will disappear. I have added a kernel boot option "acpi_pool_size=". This is necessary because a thermal trip may occur between booting and running any init script which writes the /proc/acpi/pool_size (particularly if the system was warm to start with). Therefore, the thread pool needs to be enabled at boot time. I renamed /proc/acpi/poolsize to /proc/acpi/pool_size for greater readability and compatibility with other filenames. I downgraded the debugging messages to ACPI_DB_INFO level, so they are not printed by default. The patch is very robust for me now, so I don't think we should need them. Peter > -Andi Signed-off-by: Peter R Wainwright <[EMAIL PROTECTED]>
diff -u -r linux-2.6.16.4-prw3-old/drivers/acpi/bus.c linux-2.6.16.4-prw3/drivers/acpi/bus.c --- linux-2.6.16.4-prw3-old/drivers/acpi/bus.c 2006-04-21 12:18:11.000000000 +0100 +++ linux-2.6.16.4-prw3/drivers/acpi/bus.c 2006-04-21 12:18:24.000000000 +0100 @@ -664,6 +664,7 @@ int result = 0; acpi_status status = AE_OK; extern acpi_status acpi_os_initialize1(void); + extern acpi_status acpi_os_initialize2(void); ACPI_FUNCTION_TRACE("acpi_bus_init"); @@ -727,6 +728,8 @@ */ acpi_root_dir = proc_mkdir(ACPI_BUS_FILE_ROOT, NULL); + status = acpi_os_initialize2(); + return_VALUE(0); /* Mimic structured exception handling */ diff -u -r linux-2.6.16.4-prw3-old/drivers/acpi/osl.c linux-2.6.16.4-prw3/drivers/acpi/osl.c --- linux-2.6.16.4-prw3-old/drivers/acpi/osl.c 2006-04-21 12:18:11.000000000 +0100 +++ linux-2.6.16.4-prw3/drivers/acpi/osl.c 2006-04-27 18:58:07.000000000 +0100 @@ -46,6 +46,10 @@ #include <linux/efi.h> +#include <linux/kthread.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + #define _COMPONENT ACPI_OS_SERVICES ACPI_MODULE_NAME("osl") #define PREFIX "ACPI: " @@ -75,6 +79,213 @@ static acpi_osd_handler acpi_irq_handler; static void *acpi_irq_context; static struct workqueue_struct *kacpid_wq; +struct timer_list acpi_reap_timer; + +static int reap_interval = 5000; + +/* This parameter may be configured at runtime by writing to + /proc/acpi/pool_size */ +static int max_pool_size = 0; +static int __init acpi_set_pool_size(char *str) +{ + get_option(&str, &max_pool_size); + return 0; +} +__setup("acpi_pool_size=", acpi_set_pool_size); + +/* Hopefully we need no special locking here, since this always + executes in one thread: kacpid */ +static int current_pool_size = 0; +static struct list_head thread_pool = LIST_HEAD_INIT(thread_pool); + +struct thread_pool_entry { + struct task_struct *thread; + int id; + int active; + acpi_osd_exec_callback function; + void *context; + unsigned long jiffies; + wait_queue_head_t more_work; + wait_queue_head_t work_done; + struct list_head list; +}; + +static int acpi_osl_pool_size_read(struct seq_file *seq, void *offset) +{ + struct list_head *l; + + ACPI_FUNCTION_TRACE("acpi_osl_pool_size_read"); + + seq_printf(seq, "max_pool_size=%d\n", max_pool_size); + seq_printf(seq, "current_pool_size=%d\n", current_pool_size); + /* FIXME: Need lock? */ + list_for_each(l, &thread_pool) { + struct thread_pool_entry *e = list_entry(l, struct thread_pool_entry, list); + seq_printf(seq, "entry=(%p,%d,%p,%p,%lu)\n", + e->thread, e->active, e->function, e->context, e->jiffies); + } + return_VALUE(0); +} + +static int acpi_osl_pool_size_open_fs(struct inode *inode, struct file *file) +{ + return single_open(file, acpi_osl_pool_size_read, PDE(inode)->data); +} + +static ssize_t +acpi_osl_pool_size_write(struct file *file, + const char __user * buffer, + size_t count, loff_t * ppos) +{ + char *size_string; + + ACPI_FUNCTION_TRACE("acpi_osl_pool_size_write"); + + size_string = kmalloc(4096, GFP_KERNEL); + if (!size_string) + return_VALUE(-ENOMEM); + + memset(size_string, 0, 4096); + + if (copy_from_user(size_string, buffer, count)) { + ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid data\n")); + count = -EFAULT; + goto end; + } + + size_string[count] = '\0'; + max_pool_size = simple_strtol(size_string, NULL, 10); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Pool_Size now %d\n", max_pool_size)); + +end: + kfree(size_string); + return_VALUE(count); +} + +static struct file_operations acpi_osl_pool_size_ops = { + .open = acpi_osl_pool_size_open_fs, + .read = seq_read, + .write = acpi_osl_pool_size_write, + .llseek = seq_lseek, + .release = single_release, +}; + +/* This is based on workqueue worker_thread */ +static int acpi_os_execute_worker(void *context) +{ + struct thread_pool_entry *e = (struct thread_pool_entry *)context; + DECLARE_WAITQUEUE(wait, current); + + set_user_nice(current, -5); + + set_current_state(TASK_INTERRUPTIBLE); + + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "[%d:%p] before loop\n", e->id, e)); + while (!kthread_should_stop()) { + /* Wait until there is work to do in this thread */ + add_wait_queue(&e->more_work, &wait); + if (!e->function) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "[%d:%p] sleeping\n", e->id, e)); + schedule(); + } + else { + __set_current_state(TASK_RUNNING); + } + try_to_freeze(); + remove_wait_queue(&e->more_work, &wait); + if (e->function) { + acpi_osd_exec_callback function = e->function; + void *context = e->context; + e->function = NULL; + e->context = NULL; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "[%d:%p] executing function=%p context=%p\n", + e->id, e, function, context)); + function(context); + + e->active = 0; + e->jiffies = jiffies; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "[%d:%p] exit function=%p context=%p at %lx\n", + e->id, e, function, context, e->jiffies)); + + } + set_current_state(TASK_INTERRUPTIBLE); + } + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "[%d:%p] stopped\n", e->id, e)); + + return 0; +} + +static struct work_struct acpi_reap_task; + +static void acpi_queue_reap_threads(unsigned long data); + +static void acpi_reap_threads(void *data) +{ + struct list_head *l, *n; + + /* FIXME: Need lock? */ + list_for_each_safe(l, n, &thread_pool) { + struct thread_pool_entry *e = list_entry(l, struct thread_pool_entry, list); + if (!e->active && (jiffies - e->jiffies) > 10*HZ) { + int status; + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "acpi_reap_threads REAP [%d:%p]\n", e->id, e)); + list_del(l); + current_pool_size--; + status = kthread_stop(e->thread); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "acpi_reap_threads kthread_stop returned %d\n", status)); + kfree(e); + } + } + if (!timer_pending(&acpi_reap_timer)) { + acpi_reap_timer.function = acpi_queue_reap_threads; + acpi_reap_timer.data = 0; + acpi_reap_timer.expires = jiffies + (HZ * reap_interval) / 1000; + add_timer(&(acpi_reap_timer)); + } +} + +static void acpi_queue_reap_threads(unsigned long data) +{ + queue_work(kacpid_wq, &acpi_reap_task); +} + +static struct thread_pool_entry *get_pool_thread(void) +{ + struct list_head *l; + list_for_each(l, &thread_pool) { + struct thread_pool_entry *e = list_entry(l, struct thread_pool_entry, list); + if (!e->active) { + e->active = 1; + return e; + } + } + /* N.B. For the time being we have a user-configurable limit + on the current_pool_size. We don't want a bad BIOS to create + unlimited kthreads. */ + if (current_pool_size < max_pool_size) { + struct thread_pool_entry *e; + e = kmalloc(sizeof(struct thread_pool_entry), GFP_KERNEL); + list_add_tail(&e->list, &thread_pool); + e->id = current_pool_size; + init_waitqueue_head(&e->more_work); + init_waitqueue_head(&e->work_done); + e->function = NULL; + e->context = NULL; + e->active = 1; + /* kthread_run does kthread_create and wake_up_process */ + e->thread = kthread_run(acpi_os_execute_worker, e, "kacpid-work-%d", + current_pool_size); + current_pool_size++; + if (!timer_pending(&acpi_reap_timer)) { + acpi_reap_timer.function = acpi_queue_reap_threads; + acpi_reap_timer.data = 0; + acpi_reap_timer.expires = jiffies + (HZ * reap_interval) / 1000; + add_timer(&(acpi_reap_timer)); + } + return e; + } + return NULL; +} acpi_status acpi_os_initialize(void) { @@ -98,6 +309,18 @@ return AE_OK; } +/* enable pool_size modification by /proc filesystem */ +acpi_status acpi_os_initialize2(void) +{ + struct proc_dir_entry *entry; + entry = create_proc_entry("pool_size", S_IRUSR|S_IWUSR, acpi_root_dir); + if (entry) + entry->proc_fops = &acpi_osl_pool_size_ops; + INIT_WORK(&acpi_reap_task, acpi_reap_threads, NULL); + init_timer(&acpi_reap_timer); + return AE_OK; +} + acpi_status acpi_os_terminate(void) { if (acpi_irq_handler) { @@ -661,6 +884,7 @@ static void acpi_os_execute_deferred(void *context) { struct acpi_os_dpc *dpc = NULL; + struct thread_pool_entry *e = NULL; ACPI_FUNCTION_TRACE("os_execute_deferred"); @@ -670,8 +894,17 @@ return_VOID; } - dpc->function(dpc->context); - + if (max_pool_size > 0 && (e = get_pool_thread())) { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "[%d:%p] function=%p context=%p\n", + e->id, e, dpc->function, dpc->context)); + e->context = dpc->context; + e->function = dpc->function; + wake_up(&e->more_work); + } + else { + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "[inline] function=%p context=%p\n", dpc->function, dpc->context)); + dpc->function(dpc->context); + } kfree(dpc); return_VOID;
signature.asc
Description: This is a digitally signed message part