I suppose I should let Andrea submit these, but he has such a huge patch collection (thank you!) that I thought it might be useful to pick out some of the smaller ones that would be less controversial for inclusion in the main kernel. * nanosleep-4 Provide nanosleep usec resolution so that a signal flood doesn't hang glibc folks that correctly trust the rem field to resume the nanosleep after a syscall interruption. (without the patch nanosleep resolution is instead 10msec on IA32 and around 1msec on alpha) * tsc-calibration-non-compile-time-1 TSC calibration must be dynamic and not a compile time thing because gettimeofday is dynamic and it depends on the TSCs to be in sync. * IO-wait-2 Avoid spurious unplug of the I/O queue. * buf-run_task_queue Avoid spurious unplug of the I/O queue. (again!) * account-failed-buffer-tries-1 Account also for failed buffer tries during shrink_mmap. * overcommit-1 Make sure to not understimate the available memory (the cache and buffers may be under the min percent). -- Chip Salzenberg - a.k.a. - <[EMAIL PROTECTED]> "I wanted to play hopscotch with the impenetrable mystery of existence, but he stepped in a wormhole and had to go in early." // MST3K
Tag: KERNEL-2-2-18-PRE11-PATCH-6 Patch: nanosleep-4 From: Andrea Arcangeli <[EMAIL PROTECTED]> Provide nanosleep usec resolution so that a signal flood doesn't hang glibc folks that correctly trust the rem field to resume the nanosleep after a syscall interruption. (without the patch nanosleep resolution is instead 10msec on IA32 and around 1msec on alpha) Index: linux/arch/alpha/kernel/time.c diff -u linux/arch/alpha/kernel/time.c:1.2 linux/arch/alpha/kernel/time.c:1.2.6.1 --- linux/arch/alpha/kernel/time.c:1.2 Wed Sep 13 08:32:22 2000 +++ linux/arch/alpha/kernel/time.c Wed Sep 27 23:55:48 2000 @@ -339,8 +339,22 @@ irq_handler = timer_interrupt; if (request_irq(TIMER_IRQ, irq_handler, 0, "timer", NULL)) panic("Could not allocate timer IRQ!"); + do_get_fast_time = do_gettimeofday; } +static inline void +timeval_normalize(struct timeval * tv) +{ + time_t __sec; + + __sec = tv->tv_usec / 1000000; + if (__sec) + { + tv->tv_usec %= 1000000; + tv->tv_sec += __sec; + } +} + /* * Use the cycle counter to estimate an displacement from the last time * tick. Unfortunately the Alpha designers made only the low 32-bits of @@ -389,13 +403,11 @@ #endif usec += delta_usec; - if (usec >= 1000000) { - sec += 1; - usec -= 1000000; - } tv->tv_sec = sec; tv->tv_usec = usec; + + timeval_normalize(tv); } void Index: linux/arch/i386/kernel/time.c diff -u linux/arch/i386/kernel/time.c:1.2 linux/arch/i386/kernel/time.c:1.2.6.1 --- linux/arch/i386/kernel/time.c:1.2 Wed Sep 13 08:32:22 2000 +++ linux/arch/i386/kernel/time.c Wed Sep 27 23:55:48 2000 @@ -239,6 +239,20 @@ #endif +/* FIXME: should be inline but gcc is buggy and breaks */ +static void +timeval_normalize(struct timeval * tv) +{ + time_t __sec; + + __sec = tv->tv_usec / 1000000; + if (__sec) + { + tv->tv_usec %= 1000000; + tv->tv_sec += __sec; + } +} + /* * This version of gettimeofday has microsecond resolution * and better than microsecond precision on fast x86 machines with TSC. @@ -259,13 +273,10 @@ usec += xtime.tv_usec; read_unlock_irqrestore(&xtime_lock, flags); - while (usec >= 1000000) { - usec -= 1000000; - sec++; - } - tv->tv_sec = sec; tv->tv_usec = usec; + + timeval_normalize(tv); } void do_settimeofday(struct timeval *tv) Index: linux/arch/ppc/kernel/time.c diff -u linux/arch/ppc/kernel/time.c:1.2 linux/arch/ppc/kernel/time.c:1.2.14.1 --- linux/arch/ppc/kernel/time.c:1.2 Thu Jul 6 22:41:19 2000 +++ linux/arch/ppc/kernel/time.c Wed Sep 27 23:55:48 2000 @@ -147,6 +147,19 @@ hardirq_exit(cpu); } +static inline void +timeval_normalize(struct timeval * tv) +{ + time_t __sec; + + __sec = tv->tv_usec / 1000000; + if (__sec) + { + tv->tv_usec %= 1000000; + tv->tv_sec += __sec; + } +} + /* * This version of gettimeofday has microsecond resolution. */ @@ -161,10 +174,7 @@ #ifndef __SMP__ tv->tv_usec += (decrementer_count - get_dec()) * count_period_num / count_period_den; - if (tv->tv_usec >= 1000000) { - tv->tv_usec -= 1000000; - tv->tv_sec++; - } + timeval_normalize(tv); #endif restore_flags(flags); } Index: linux/include/linux/time.h diff -u linux/include/linux/time.h:1.1 linux/include/linux/time.h:1.1.14.1 --- linux/include/linux/time.h:1.1 Thu Jul 6 22:04:59 2000 +++ linux/include/linux/time.h Wed Sep 27 23:55:48 2000 @@ -46,10 +46,53 @@ value->tv_sec = jiffies / HZ; } +static __inline__ int +timespec_before(struct timespec a, struct timespec b) +{ + if (a.tv_sec == b.tv_sec) + return a.tv_nsec < b.tv_nsec; + return a.tv_sec < b.tv_sec; +} + +/* computes `a - b' and write the result in `result', assumes `a >= b' */ +static inline void +timespec_less(struct timespec a, struct timespec b, struct timespec * result) +{ + if (a.tv_nsec < b.tv_nsec) + { + a.tv_sec--; + a.tv_nsec += 1000000000; + } + + result->tv_sec = a.tv_sec - b.tv_sec; + result->tv_nsec = a.tv_nsec - b.tv_nsec; +} + struct timeval { time_t tv_sec; /* seconds */ suseconds_t tv_usec; /* microseconds */ }; + +/* computes `a - b' and write the result in `result', assumes `a >= b' */ +static inline void +timeval_less(struct timeval a, struct timeval b, struct timeval * result) +{ + if (a.tv_usec < b.tv_usec) + { + a.tv_sec--; + a.tv_usec += 1000000; + } + + result->tv_sec = a.tv_sec - b.tv_sec; + result->tv_usec = a.tv_usec - b.tv_usec; +} + +static __inline__ void +timeval_to_timespec(struct timeval tv, struct timespec * ts) +{ + ts->tv_sec = tv.tv_sec; + ts->tv_nsec = (long) tv.tv_usec * 1000; +} struct timezone { int tz_minuteswest; /* minutes west of Greenwich */ Index: linux/kernel/sched.c diff -u linux/kernel/sched.c:1.2.10.2 linux/kernel/sched.c:1.2.10.3 --- linux/kernel/sched.c:1.2.10.2 Wed Sep 27 23:55:16 2000 +++ linux/kernel/sched.c Wed Sep 27 23:55:48 2000 @@ -1946,6 +1946,7 @@ { struct timespec t; unsigned long expire; + struct timeval before, after; if(copy_from_user(&t, rqtp, sizeof(struct timespec))) return -EFAULT; @@ -1970,11 +1971,20 @@ expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); current->state = TASK_INTERRUPTIBLE; + get_fast_time(&before); expire = schedule_timeout(expire); + get_fast_time(&after); if (expire) { if (rmtp) { - jiffies_to_timespec(expire, &t); + struct timespec elapsed; + + timeval_less(after, before, &after); + timeval_to_timespec(after, &elapsed); + if (timespec_before(elapsed, t)) + timespec_less(t, elapsed, &t); + else + t.tv_nsec = t.tv_sec = 0; if (copy_to_user(rmtp, &t, sizeof(struct timespec))) return -EFAULT; }
Tag: KERNEL-2-2-18-PRE11-PATCH-17 Patch: tsc-calibration-non-compile-time-1 From: Andrea Arcangeli <[EMAIL PROTECTED]> TSC calibration must be dynamic and not a compile time thing because gettimeofday is dynamic and it depends on the TSCs to be in sync. Index: linux/arch/i386/kernel/smp.c diff -u linux/arch/i386/kernel/smp.c:1.3.6.1 linux/arch/i386/kernel/smp.c:1.3.6.2 --- linux/arch/i386/kernel/smp.c:1.3.6.1 Wed Sep 27 23:54:02 2000 +++ linux/arch/i386/kernel/smp.c Thu Sep 28 00:03:31 2000 @@ -795,7 +795,6 @@ return memory_start; } -#ifdef CONFIG_X86_TSC /* * TSC synchronization. * @@ -995,8 +994,6 @@ } #undef NR_LOOPS -#endif - extern void calibrate_delay(void); void __init smp_callin(void) @@ -1083,12 +1080,11 @@ */ set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]); -#ifdef CONFIG_X86_TSC /* * Synchronize the TSC with the BP */ - synchronize_tsc_ap (); -#endif + if (boot_cpu_data.x86_capability & X86_FEATURE_TSC) + synchronize_tsc_ap (); } int cpucount = 0; @@ -1624,13 +1620,11 @@ smp_done: -#ifdef CONFIG_X86_TSC /* * Synchronize the TSC with the AP */ - if (cpucount) + if (boot_cpu_data.x86_capability & X86_FEATURE_TSC && cpucount) synchronize_tsc_bp(); -#endif } /*
Tag: KERNEL-2-2-18-PRE11-PATCH-8 Patch: IO-wait-2 From: Andrea Arcangeli <[EMAIL PROTECTED]> Avoid spurious unplug of the I/O queue. Index: linux/fs/buffer.c diff -u linux/fs/buffer.c:1.2 linux/fs/buffer.c:1.2.14.1 --- linux/fs/buffer.c:1.2 Thu Jul 6 22:41:37 2000 +++ linux/fs/buffer.c Wed Sep 27 23:57:09 2000 @@ -143,13 +143,14 @@ bh->b_count++; wait.task = tsk; add_wait_queue(&bh->b_wait, &wait); -repeat: - tsk->state = TASK_UNINTERRUPTIBLE; - run_task_queue(&tq_disk); - if (buffer_locked(bh)) { + do { + run_task_queue(&tq_disk); + tsk->state = TASK_UNINTERRUPTIBLE; + mb(); + if (!buffer_locked(bh)) + break; schedule(); - goto repeat; - } + } while (buffer_locked(bh)); tsk->state = TASK_RUNNING; remove_wait_queue(&bh->b_wait, &wait); bh->b_count--; Index: linux/mm/filemap.c diff -u linux/mm/filemap.c:1.4 linux/mm/filemap.c:1.4.4.1 --- linux/mm/filemap.c:1.4 Thu Sep 21 19:43:11 2000 +++ linux/mm/filemap.c Wed Sep 27 23:57:09 2000 @@ -357,13 +357,14 @@ wait.task = tsk; add_wait_queue(&page->wait, &wait); -repeat: - tsk->state = TASK_UNINTERRUPTIBLE; - sync_page(page); - if (PageLocked(page)) { + do { + sync_page(page); + tsk->state = TASK_UNINTERRUPTIBLE; + mb(); + if (!PageLocked(page)) + break; schedule(); - goto repeat; - } + } while (PageLocked(page)); tsk->state = TASK_RUNNING; remove_wait_queue(&page->wait, &wait); }
Tag: KERNEL-2-2-18-PRE11-PATCH-14 Patch: buf-run_task_queue From: Andrea Arcangeli <[EMAIL PROTECTED]> Avoid spurious unplug of the I/O queue. Index: linux/fs/buffer.c diff -u linux/fs/buffer.c:1.2.14.1 linux/fs/buffer.c:1.2.14.2 --- linux/fs/buffer.c:1.2.14.1 Wed Sep 27 23:57:09 2000 +++ linux/fs/buffer.c Thu Sep 28 00:01:06 2000 @@ -1758,7 +1758,6 @@ if (ncount) printk("sync_old_buffers: %d dirty buffers not on dirty list\n", ncount); printk("Wrote %d/%d buffers\n", nwritten, ndirty); #endif - run_task_queue(&tq_disk); return 0; }
Tag: KERNEL-2-2-18-PRE11-PATCH-9 Patch: account-failed-buffer-tries-1 From: Andrea Arcangeli <[EMAIL PROTECTED]> Account also the failed buffer tries during shrink_mmap. Index: linux/mm/filemap.c diff -u linux/mm/filemap.c:1.4.4.1 linux/mm/filemap.c:1.4.4.2 --- linux/mm/filemap.c:1.4.4.1 Wed Sep 27 23:57:09 2000 +++ linux/mm/filemap.c Wed Sep 27 23:57:48 2000 @@ -188,6 +188,8 @@ if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) continue; + count--; + /* * Is it a page swap page? If so, we want to * drop it if it is no longer used, even if it @@ -233,7 +235,7 @@ return 1; } - } while (--count > 0); + } while (count > 0); return 0; }
Tag: KERNEL-2-2-18-PRE11-PATCH-11 Patch: overcommit-1 From: Andrea Arcangeli <[EMAIL PROTECTED]> Make sure to not understimate the available memory (the cache and buffers may be under the min percent). Index: linux/mm/mmap.c diff -u linux/mm/mmap.c:1.2 linux/mm/mmap.c:1.2.4.1 --- linux/mm/mmap.c:1.2 Thu Sep 21 19:43:11 2000 +++ linux/mm/mmap.c Wed Sep 27 23:59:06 2000 @@ -66,7 +66,6 @@ free += page_cache_size; free += nr_free_pages; free += nr_swap_pages; - free -= (page_cache.min_percent + buffer_mem.min_percent + 2)*num_physpages/100; return free > pages; }